| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 | FileCheck %s |
| ; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -slp-threshold=-10 | FileCheck %s --check-prefix=THRESHOLD |
| |
| @n = external local_unnamed_addr global i32, align 4 |
| @arr = common local_unnamed_addr global [20 x float] zeroinitializer, align 16 |
| @arr1 = common local_unnamed_addr global [20 x float] zeroinitializer, align 16 |
| @res = external local_unnamed_addr global float, align 4 |
| |
| define float @baz() { |
| ; CHECK-LABEL: @baz( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 |
| ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 |
| ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16 |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16 |
| ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 |
| ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP4]], [[CONV]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 |
| ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP5]], [[ADD]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8 |
| ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8 |
| ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 |
| ; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP9]], [[ADD_1]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 |
| ; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP10]], [[ADD_2]] |
| ; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV]] |
| ; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float [[TMP4]], [[ADD7]] |
| ; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float [[TMP5]], [[ADD19]] |
| ; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP9]], [[ADD19_1]] |
| ; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP10]], [[ADD19_2]] |
| ; CHECK-NEXT: store float [[ADD19_3]], float* @res, align 4 |
| ; CHECK-NEXT: ret float [[ADD19_3]] |
| ; |
| ; THRESHOLD-LABEL: @baz( |
| ; THRESHOLD-NEXT: entry: |
| ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 |
| ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 |
| ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float |
| ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16 |
| ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16 |
| ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]] |
| ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 |
| ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[TMP4]], [[CONV]] |
| ; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 |
| ; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP5]], [[ADD]] |
| ; THRESHOLD-NEXT: [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8 |
| ; THRESHOLD-NEXT: [[TMP7:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8 |
| ; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]] |
| ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 |
| ; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP9]], [[ADD_1]] |
| ; THRESHOLD-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 |
| ; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP10]], [[ADD_2]] |
| ; THRESHOLD-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV]] |
| ; THRESHOLD-NEXT: [[ADD19:%.*]] = fadd fast float [[TMP4]], [[ADD7]] |
| ; THRESHOLD-NEXT: [[ADD19_1:%.*]] = fadd fast float [[TMP5]], [[ADD19]] |
| ; THRESHOLD-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP9]], [[ADD19_1]] |
| ; THRESHOLD-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP10]], [[ADD19_2]] |
| ; THRESHOLD-NEXT: store float [[ADD19_3]], float* @res, align 4 |
| ; THRESHOLD-NEXT: ret float [[ADD19_3]] |
| ; |
| entry: |
| %0 = load i32, i32* @n, align 4 |
| %mul = mul nsw i32 %0, 3 |
| %conv = sitofp i32 %mul to float |
| %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 |
| %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16 |
| %mul4 = fmul fast float %2, %1 |
| %add = fadd fast float %mul4, %conv |
| %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4 |
| %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4 |
| %mul4.1 = fmul fast float %4, %3 |
| %add.1 = fadd fast float %mul4.1, %add |
| %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 |
| %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 |
| %mul4.2 = fmul fast float %6, %5 |
| %add.2 = fadd fast float %mul4.2, %add.1 |
| %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 |
| %8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 |
| %mul4.3 = fmul fast float %8, %7 |
| %add.3 = fadd fast float %mul4.3, %add.2 |
| %add7 = fadd fast float %add.3, %conv |
| %add19 = fadd fast float %mul4, %add7 |
| %add19.1 = fadd fast float %mul4.1, %add19 |
| %add19.2 = fadd fast float %mul4.2, %add19.1 |
| %add19.3 = fadd fast float %mul4.3, %add19.2 |
| store float %add19.3, float* @res, align 4 |
| ret float %add19.3 |
| } |
| |
| define float @bazz() { |
| ; CHECK-LABEL: @bazz( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 |
| ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 |
| ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16 |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 |
| ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] |
| ; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 |
| ; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float |
| ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]]) |
| ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] |
| ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] |
| ; CHECK-NEXT: store float [[OP_EXTRA1]], float* @res, align 4 |
| ; CHECK-NEXT: ret float [[OP_EXTRA1]] |
| ; |
| ; THRESHOLD-LABEL: @bazz( |
| ; THRESHOLD-NEXT: entry: |
| ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 |
| ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 |
| ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float |
| ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16 |
| ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 |
| ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] |
| ; THRESHOLD-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 |
| ; THRESHOLD-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float |
| ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]]) |
| ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] |
| ; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] |
| ; THRESHOLD-NEXT: store float [[OP_EXTRA1]], float* @res, align 4 |
| ; THRESHOLD-NEXT: ret float [[OP_EXTRA1]] |
| ; |
| entry: |
| %0 = load i32, i32* @n, align 4 |
| %mul = mul nsw i32 %0, 3 |
| %conv = sitofp i32 %mul to float |
| %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 |
| %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16 |
| %mul4 = fmul fast float %2, %1 |
| %add = fadd fast float %mul4, %conv |
| %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4 |
| %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4 |
| %mul4.1 = fmul fast float %4, %3 |
| %add.1 = fadd fast float %mul4.1, %add |
| %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 |
| %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 |
| %mul4.2 = fmul fast float %6, %5 |
| %add.2 = fadd fast float %mul4.2, %add.1 |
| %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 |
| %8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 |
| %mul4.3 = fmul fast float %8, %7 |
| %add.3 = fadd fast float %mul4.3, %add.2 |
| %mul5 = shl nsw i32 %0, 2 |
| %conv6 = sitofp i32 %mul5 to float |
| %add7 = fadd fast float %add.3, %conv6 |
| %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 4), align 16 |
| %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 4), align 16 |
| %mul18 = fmul fast float %10, %9 |
| %add19 = fadd fast float %mul18, %add7 |
| %11 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 5), align 4 |
| %12 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 5), align 4 |
| %mul18.1 = fmul fast float %12, %11 |
| %add19.1 = fadd fast float %mul18.1, %add19 |
| %13 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 6), align 8 |
| %14 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 6), align 8 |
| %mul18.2 = fmul fast float %14, %13 |
| %add19.2 = fadd fast float %mul18.2, %add19.1 |
| %15 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 7), align 4 |
| %16 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 7), align 4 |
| %mul18.3 = fmul fast float %16, %15 |
| %add19.3 = fadd fast float %mul18.3, %add19.2 |
| store float %add19.3, float* @res, align 4 |
| ret float %add19.3 |
| } |
| |
| define float @bazzz() { |
| ; CHECK-LABEL: @bazzz( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 |
| ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 |
| ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] |
| ; CHECK-NEXT: store float [[TMP5]], float* @res, align 4 |
| ; CHECK-NEXT: ret float [[TMP5]] |
| ; |
| ; THRESHOLD-LABEL: @bazzz( |
| ; THRESHOLD-NEXT: entry: |
| ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 |
| ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float |
| ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 |
| ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 |
| ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] |
| ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) |
| ; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] |
| ; THRESHOLD-NEXT: store float [[TMP5]], float* @res, align 4 |
| ; THRESHOLD-NEXT: ret float [[TMP5]] |
| ; |
| entry: |
| %0 = load i32, i32* @n, align 4 |
| %conv = sitofp i32 %0 to float |
| %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 |
| %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16 |
| %mul = fmul fast float %2, %1 |
| %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4 |
| %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4 |
| %mul.1 = fmul fast float %4, %3 |
| %5 = fadd fast float %mul.1, %mul |
| %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 |
| %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 |
| %mul.2 = fmul fast float %7, %6 |
| %8 = fadd fast float %mul.2, %5 |
| %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 |
| %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 |
| %mul.3 = fmul fast float %10, %9 |
| %11 = fadd fast float %mul.3, %8 |
| %12 = fmul fast float %conv, %11 |
| store float %12, float* @res, align 4 |
| ret float %12 |
| } |
| |
| define i32 @foo() { |
| ; CHECK-LABEL: @foo( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 |
| ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 |
| ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] |
| ; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 |
| ; CHECK-NEXT: store i32 [[CONV4]], i32* @n, align 4 |
| ; CHECK-NEXT: ret i32 [[CONV4]] |
| ; |
| ; THRESHOLD-LABEL: @foo( |
| ; THRESHOLD-NEXT: entry: |
| ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 |
| ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float |
| ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 |
| ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 |
| ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] |
| ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) |
| ; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] |
| ; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 |
| ; THRESHOLD-NEXT: store i32 [[CONV4]], i32* @n, align 4 |
| ; THRESHOLD-NEXT: ret i32 [[CONV4]] |
| ; |
| entry: |
| %0 = load i32, i32* @n, align 4 |
| %conv = sitofp i32 %0 to float |
| %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 |
| %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16 |
| %mul = fmul fast float %2, %1 |
| %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4 |
| %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4 |
| %mul.1 = fmul fast float %4, %3 |
| %5 = fadd fast float %mul.1, %mul |
| %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 |
| %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 |
| %mul.2 = fmul fast float %7, %6 |
| %8 = fadd fast float %mul.2, %5 |
| %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 |
| %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 |
| %mul.3 = fmul fast float %10, %9 |
| %11 = fadd fast float %mul.3, %8 |
| %12 = fmul fast float %conv, %11 |
| %conv4 = fptosi float %12 to i32 |
| store i32 %conv4, i32* @n, align 4 |
| ret i32 %conv4 |
| } |
| |
| ; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select |
| ; with fastmath on the select. |
| define float @bar() { |
| ; CHECK-LABEL: @bar( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16 |
| ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 |
| ; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] |
| ; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 |
| ; CHECK-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 |
| ; CHECK-NEXT: [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]] |
| ; CHECK-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]] |
| ; CHECK-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 |
| ; CHECK-NEXT: [[TMP8:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 |
| ; CHECK-NEXT: [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]] |
| ; CHECK-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]] |
| ; CHECK-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]] |
| ; CHECK-NEXT: store float [[MAX_0_MUL3_2]], float* @res, align 4 |
| ; CHECK-NEXT: ret float [[MAX_0_MUL3_2]] |
| ; |
| ; THRESHOLD-LABEL: @bar( |
| ; THRESHOLD-NEXT: entry: |
| ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16 |
| ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16 |
| ; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]] |
| ; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 |
| ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 |
| ; THRESHOLD-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] |
| ; THRESHOLD-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]] |
| ; THRESHOLD-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 |
| ; THRESHOLD-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 |
| ; THRESHOLD-NEXT: [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]] |
| ; THRESHOLD-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]] |
| ; THRESHOLD-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]] |
| ; THRESHOLD-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 |
| ; THRESHOLD-NEXT: [[TMP8:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 |
| ; THRESHOLD-NEXT: [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]] |
| ; THRESHOLD-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]] |
| ; THRESHOLD-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]] |
| ; THRESHOLD-NEXT: store float [[MAX_0_MUL3_2]], float* @res, align 4 |
| ; THRESHOLD-NEXT: ret float [[MAX_0_MUL3_2]] |
| ; |
| entry: |
| %0 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 |
| %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16 |
| %mul = fmul fast float %1, %0 |
| %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4 |
| %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4 |
| %mul3 = fmul fast float %3, %2 |
| %cmp4 = fcmp fast ogt float %mul, %mul3 |
| %max.0.mul3 = select i1 %cmp4, float %mul, float %mul3 |
| %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 |
| %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 |
| %mul3.1 = fmul fast float %5, %4 |
| %cmp4.1 = fcmp fast ogt float %max.0.mul3, %mul3.1 |
| %max.0.mul3.1 = select i1 %cmp4.1, float %max.0.mul3, float %mul3.1 |
| %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 |
| %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 |
| %mul3.2 = fmul fast float %7, %6 |
| %cmp4.2 = fcmp fast ogt float %max.0.mul3.1, %mul3.2 |
| %max.0.mul3.2 = select i1 %cmp4.2, float %max.0.mul3.1, float %mul3.2 |
| store float %max.0.mul3.2, float* @res, align 4 |
| ret float %max.0.mul3.2 |
| } |
| |
| define float @f(float* nocapture readonly %x) { |
| ; CHECK-LABEL: @f( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 |
| ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 |
| ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 |
| ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 |
| ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 |
| ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 |
| ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 |
| ; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 |
| ; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 |
| ; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 |
| ; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 |
| ; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 |
| ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 |
| ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 |
| ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 |
| ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 |
| ; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 |
| ; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 |
| ; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 |
| ; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 |
| ; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 |
| ; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 |
| ; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 |
| ; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 |
| ; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 |
| ; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 |
| ; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 |
| ; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 |
| ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 |
| ; CHECK-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 |
| ; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 |
| ; CHECK-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32 |
| ; CHECK-NEXT: [[ARRAYIDX_33:%.*]] = getelementptr inbounds float, float* [[X]], i64 33 |
| ; CHECK-NEXT: [[ARRAYIDX_34:%.*]] = getelementptr inbounds float, float* [[X]], i64 34 |
| ; CHECK-NEXT: [[ARRAYIDX_35:%.*]] = getelementptr inbounds float, float* [[X]], i64 35 |
| ; CHECK-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds float, float* [[X]], i64 36 |
| ; CHECK-NEXT: [[ARRAYIDX_37:%.*]] = getelementptr inbounds float, float* [[X]], i64 37 |
| ; CHECK-NEXT: [[ARRAYIDX_38:%.*]] = getelementptr inbounds float, float* [[X]], i64 38 |
| ; CHECK-NEXT: [[ARRAYIDX_39:%.*]] = getelementptr inbounds float, float* [[X]], i64 39 |
| ; CHECK-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds float, float* [[X]], i64 40 |
| ; CHECK-NEXT: [[ARRAYIDX_41:%.*]] = getelementptr inbounds float, float* [[X]], i64 41 |
| ; CHECK-NEXT: [[ARRAYIDX_42:%.*]] = getelementptr inbounds float, float* [[X]], i64 42 |
| ; CHECK-NEXT: [[ARRAYIDX_43:%.*]] = getelementptr inbounds float, float* [[X]], i64 43 |
| ; CHECK-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds float, float* [[X]], i64 44 |
| ; CHECK-NEXT: [[ARRAYIDX_45:%.*]] = getelementptr inbounds float, float* [[X]], i64 45 |
| ; CHECK-NEXT: [[ARRAYIDX_46:%.*]] = getelementptr inbounds float, float* [[X]], i64 46 |
| ; CHECK-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 |
| ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* |
| ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 |
| ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP3]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]]) |
| ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] |
| ; CHECK-NEXT: ret float [[OP_RDX]] |
| ; |
| ; THRESHOLD-LABEL: @f( |
| ; THRESHOLD-NEXT: entry: |
| ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 |
| ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* |
| ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_33:%.*]] = getelementptr inbounds float, float* [[X]], i64 33 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_34:%.*]] = getelementptr inbounds float, float* [[X]], i64 34 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_35:%.*]] = getelementptr inbounds float, float* [[X]], i64 35 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds float, float* [[X]], i64 36 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_37:%.*]] = getelementptr inbounds float, float* [[X]], i64 37 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_38:%.*]] = getelementptr inbounds float, float* [[X]], i64 38 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_39:%.*]] = getelementptr inbounds float, float* [[X]], i64 39 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds float, float* [[X]], i64 40 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_41:%.*]] = getelementptr inbounds float, float* [[X]], i64 41 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_42:%.*]] = getelementptr inbounds float, float* [[X]], i64 42 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_43:%.*]] = getelementptr inbounds float, float* [[X]], i64 43 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds float, float* [[X]], i64 44 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_45:%.*]] = getelementptr inbounds float, float* [[X]], i64 45 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_46:%.*]] = getelementptr inbounds float, float* [[X]], i64 46 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 |
| ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* |
| ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 |
| ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP3]]) |
| ; THRESHOLD-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]]) |
| ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] |
| ; THRESHOLD-NEXT: ret float [[OP_RDX]] |
| ; |
| entry: |
| %0 = load float, float* %x, align 4 |
| %arrayidx.1 = getelementptr inbounds float, float* %x, i64 1 |
| %1 = load float, float* %arrayidx.1, align 4 |
| %add.1 = fadd fast float %1, %0 |
| %arrayidx.2 = getelementptr inbounds float, float* %x, i64 2 |
| %2 = load float, float* %arrayidx.2, align 4 |
| %add.2 = fadd fast float %2, %add.1 |
| %arrayidx.3 = getelementptr inbounds float, float* %x, i64 3 |
| %3 = load float, float* %arrayidx.3, align 4 |
| %add.3 = fadd fast float %3, %add.2 |
| %arrayidx.4 = getelementptr inbounds float, float* %x, i64 4 |
| %4 = load float, float* %arrayidx.4, align 4 |
| %add.4 = fadd fast float %4, %add.3 |
| %arrayidx.5 = getelementptr inbounds float, float* %x, i64 5 |
| %5 = load float, float* %arrayidx.5, align 4 |
| %add.5 = fadd fast float %5, %add.4 |
| %arrayidx.6 = getelementptr inbounds float, float* %x, i64 6 |
| %6 = load float, float* %arrayidx.6, align 4 |
| %add.6 = fadd fast float %6, %add.5 |
| %arrayidx.7 = getelementptr inbounds float, float* %x, i64 7 |
| %7 = load float, float* %arrayidx.7, align 4 |
| %add.7 = fadd fast float %7, %add.6 |
| %arrayidx.8 = getelementptr inbounds float, float* %x, i64 8 |
| %8 = load float, float* %arrayidx.8, align 4 |
| %add.8 = fadd fast float %8, %add.7 |
| %arrayidx.9 = getelementptr inbounds float, float* %x, i64 9 |
| %9 = load float, float* %arrayidx.9, align 4 |
| %add.9 = fadd fast float %9, %add.8 |
| %arrayidx.10 = getelementptr inbounds float, float* %x, i64 10 |
| %10 = load float, float* %arrayidx.10, align 4 |
| %add.10 = fadd fast float %10, %add.9 |
| %arrayidx.11 = getelementptr inbounds float, float* %x, i64 11 |
| %11 = load float, float* %arrayidx.11, align 4 |
| %add.11 = fadd fast float %11, %add.10 |
| %arrayidx.12 = getelementptr inbounds float, float* %x, i64 12 |
| %12 = load float, float* %arrayidx.12, align 4 |
| %add.12 = fadd fast float %12, %add.11 |
| %arrayidx.13 = getelementptr inbounds float, float* %x, i64 13 |
| %13 = load float, float* %arrayidx.13, align 4 |
| %add.13 = fadd fast float %13, %add.12 |
| %arrayidx.14 = getelementptr inbounds float, float* %x, i64 14 |
| %14 = load float, float* %arrayidx.14, align 4 |
| %add.14 = fadd fast float %14, %add.13 |
| %arrayidx.15 = getelementptr inbounds float, float* %x, i64 15 |
| %15 = load float, float* %arrayidx.15, align 4 |
| %add.15 = fadd fast float %15, %add.14 |
| %arrayidx.16 = getelementptr inbounds float, float* %x, i64 16 |
| %16 = load float, float* %arrayidx.16, align 4 |
| %add.16 = fadd fast float %16, %add.15 |
| %arrayidx.17 = getelementptr inbounds float, float* %x, i64 17 |
| %17 = load float, float* %arrayidx.17, align 4 |
| %add.17 = fadd fast float %17, %add.16 |
| %arrayidx.18 = getelementptr inbounds float, float* %x, i64 18 |
| %18 = load float, float* %arrayidx.18, align 4 |
| %add.18 = fadd fast float %18, %add.17 |
| %arrayidx.19 = getelementptr inbounds float, float* %x, i64 19 |
| %19 = load float, float* %arrayidx.19, align 4 |
| %add.19 = fadd fast float %19, %add.18 |
| %arrayidx.20 = getelementptr inbounds float, float* %x, i64 20 |
| %20 = load float, float* %arrayidx.20, align 4 |
| %add.20 = fadd fast float %20, %add.19 |
| %arrayidx.21 = getelementptr inbounds float, float* %x, i64 21 |
| %21 = load float, float* %arrayidx.21, align 4 |
| %add.21 = fadd fast float %21, %add.20 |
| %arrayidx.22 = getelementptr inbounds float, float* %x, i64 22 |
| %22 = load float, float* %arrayidx.22, align 4 |
| %add.22 = fadd fast float %22, %add.21 |
| %arrayidx.23 = getelementptr inbounds float, float* %x, i64 23 |
| %23 = load float, float* %arrayidx.23, align 4 |
| %add.23 = fadd fast float %23, %add.22 |
| %arrayidx.24 = getelementptr inbounds float, float* %x, i64 24 |
| %24 = load float, float* %arrayidx.24, align 4 |
| %add.24 = fadd fast float %24, %add.23 |
| %arrayidx.25 = getelementptr inbounds float, float* %x, i64 25 |
| %25 = load float, float* %arrayidx.25, align 4 |
| %add.25 = fadd fast float %25, %add.24 |
| %arrayidx.26 = getelementptr inbounds float, float* %x, i64 26 |
| %26 = load float, float* %arrayidx.26, align 4 |
| %add.26 = fadd fast float %26, %add.25 |
| %arrayidx.27 = getelementptr inbounds float, float* %x, i64 27 |
| %27 = load float, float* %arrayidx.27, align 4 |
| %add.27 = fadd fast float %27, %add.26 |
| %arrayidx.28 = getelementptr inbounds float, float* %x, i64 28 |
| %28 = load float, float* %arrayidx.28, align 4 |
| %add.28 = fadd fast float %28, %add.27 |
| %arrayidx.29 = getelementptr inbounds float, float* %x, i64 29 |
| %29 = load float, float* %arrayidx.29, align 4 |
| %add.29 = fadd fast float %29, %add.28 |
| %arrayidx.30 = getelementptr inbounds float, float* %x, i64 30 |
| %30 = load float, float* %arrayidx.30, align 4 |
| %add.30 = fadd fast float %30, %add.29 |
| %arrayidx.31 = getelementptr inbounds float, float* %x, i64 31 |
| %31 = load float, float* %arrayidx.31, align 4 |
| %add.31 = fadd fast float %31, %add.30 |
| %arrayidx.32 = getelementptr inbounds float, float* %x, i64 32 |
| %32 = load float, float* %arrayidx.32, align 4 |
| %add.32 = fadd fast float %32, %add.31 |
| %arrayidx.33 = getelementptr inbounds float, float* %x, i64 33 |
| %33 = load float, float* %arrayidx.33, align 4 |
| %add.33 = fadd fast float %33, %add.32 |
| %arrayidx.34 = getelementptr inbounds float, float* %x, i64 34 |
| %34 = load float, float* %arrayidx.34, align 4 |
| %add.34 = fadd fast float %34, %add.33 |
| %arrayidx.35 = getelementptr inbounds float, float* %x, i64 35 |
| %35 = load float, float* %arrayidx.35, align 4 |
| %add.35 = fadd fast float %35, %add.34 |
| %arrayidx.36 = getelementptr inbounds float, float* %x, i64 36 |
| %36 = load float, float* %arrayidx.36, align 4 |
| %add.36 = fadd fast float %36, %add.35 |
| %arrayidx.37 = getelementptr inbounds float, float* %x, i64 37 |
| %37 = load float, float* %arrayidx.37, align 4 |
| %add.37 = fadd fast float %37, %add.36 |
| %arrayidx.38 = getelementptr inbounds float, float* %x, i64 38 |
| %38 = load float, float* %arrayidx.38, align 4 |
| %add.38 = fadd fast float %38, %add.37 |
| %arrayidx.39 = getelementptr inbounds float, float* %x, i64 39 |
| %39 = load float, float* %arrayidx.39, align 4 |
| %add.39 = fadd fast float %39, %add.38 |
| %arrayidx.40 = getelementptr inbounds float, float* %x, i64 40 |
| %40 = load float, float* %arrayidx.40, align 4 |
| %add.40 = fadd fast float %40, %add.39 |
| %arrayidx.41 = getelementptr inbounds float, float* %x, i64 41 |
| %41 = load float, float* %arrayidx.41, align 4 |
| %add.41 = fadd fast float %41, %add.40 |
| %arrayidx.42 = getelementptr inbounds float, float* %x, i64 42 |
| %42 = load float, float* %arrayidx.42, align 4 |
| %add.42 = fadd fast float %42, %add.41 |
| %arrayidx.43 = getelementptr inbounds float, float* %x, i64 43 |
| %43 = load float, float* %arrayidx.43, align 4 |
| %add.43 = fadd fast float %43, %add.42 |
| %arrayidx.44 = getelementptr inbounds float, float* %x, i64 44 |
| %44 = load float, float* %arrayidx.44, align 4 |
| %add.44 = fadd fast float %44, %add.43 |
| %arrayidx.45 = getelementptr inbounds float, float* %x, i64 45 |
| %45 = load float, float* %arrayidx.45, align 4 |
| %add.45 = fadd fast float %45, %add.44 |
| %arrayidx.46 = getelementptr inbounds float, float* %x, i64 46 |
| %46 = load float, float* %arrayidx.46, align 4 |
| %add.46 = fadd fast float %46, %add.45 |
| %arrayidx.47 = getelementptr inbounds float, float* %x, i64 47 |
| %47 = load float, float* %arrayidx.47, align 4 |
| %add.47 = fadd fast float %47, %add.46 |
| ret float %add.47 |
| } |
| |
| define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { |
| ; CHECK-LABEL: @f1( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float |
| ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 |
| ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 |
| ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 |
| ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 |
| ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 |
| ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 |
| ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 |
| ; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 |
| ; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 |
| ; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 |
| ; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 |
| ; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 |
| ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 |
| ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 |
| ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 |
| ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 |
| ; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 |
| ; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 |
| ; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 |
| ; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 |
| ; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 |
| ; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 |
| ; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 |
| ; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 |
| ; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 |
| ; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 |
| ; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 |
| ; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 |
| ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 |
| ; CHECK-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 |
| ; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]]) |
| ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] |
| ; CHECK-NEXT: ret float [[OP_EXTRA]] |
| ; |
| ; THRESHOLD-LABEL: @f1( |
| ; THRESHOLD-NEXT: entry: |
| ; THRESHOLD-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]] |
| ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float |
| ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 |
| ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* |
| ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 |
| ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]]) |
| ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] |
| ; THRESHOLD-NEXT: ret float [[OP_EXTRA]] |
| ; |
| entry: |
| %rem = srem i32 %a, %b |
| %conv = sitofp i32 %rem to float |
| %0 = load float, float* %x, align 4 |
| %add = fadd fast float %0, %conv |
| %arrayidx.1 = getelementptr inbounds float, float* %x, i64 1 |
| %1 = load float, float* %arrayidx.1, align 4 |
| %add.1 = fadd fast float %1, %add |
| %arrayidx.2 = getelementptr inbounds float, float* %x, i64 2 |
| %2 = load float, float* %arrayidx.2, align 4 |
| %add.2 = fadd fast float %2, %add.1 |
| %arrayidx.3 = getelementptr inbounds float, float* %x, i64 3 |
| %3 = load float, float* %arrayidx.3, align 4 |
| %add.3 = fadd fast float %3, %add.2 |
| %arrayidx.4 = getelementptr inbounds float, float* %x, i64 4 |
| %4 = load float, float* %arrayidx.4, align 4 |
| %add.4 = fadd fast float %4, %add.3 |
| %arrayidx.5 = getelementptr inbounds float, float* %x, i64 5 |
| %5 = load float, float* %arrayidx.5, align 4 |
| %add.5 = fadd fast float %5, %add.4 |
| %arrayidx.6 = getelementptr inbounds float, float* %x, i64 6 |
| %6 = load float, float* %arrayidx.6, align 4 |
| %add.6 = fadd fast float %6, %add.5 |
| %arrayidx.7 = getelementptr inbounds float, float* %x, i64 7 |
| %7 = load float, float* %arrayidx.7, align 4 |
| %add.7 = fadd fast float %7, %add.6 |
| %arrayidx.8 = getelementptr inbounds float, float* %x, i64 8 |
| %8 = load float, float* %arrayidx.8, align 4 |
| %add.8 = fadd fast float %8, %add.7 |
| %arrayidx.9 = getelementptr inbounds float, float* %x, i64 9 |
| %9 = load float, float* %arrayidx.9, align 4 |
| %add.9 = fadd fast float %9, %add.8 |
| %arrayidx.10 = getelementptr inbounds float, float* %x, i64 10 |
| %10 = load float, float* %arrayidx.10, align 4 |
| %add.10 = fadd fast float %10, %add.9 |
| %arrayidx.11 = getelementptr inbounds float, float* %x, i64 11 |
| %11 = load float, float* %arrayidx.11, align 4 |
| %add.11 = fadd fast float %11, %add.10 |
| %arrayidx.12 = getelementptr inbounds float, float* %x, i64 12 |
| %12 = load float, float* %arrayidx.12, align 4 |
| %add.12 = fadd fast float %12, %add.11 |
| %arrayidx.13 = getelementptr inbounds float, float* %x, i64 13 |
| %13 = load float, float* %arrayidx.13, align 4 |
| %add.13 = fadd fast float %13, %add.12 |
| %arrayidx.14 = getelementptr inbounds float, float* %x, i64 14 |
| %14 = load float, float* %arrayidx.14, align 4 |
| %add.14 = fadd fast float %14, %add.13 |
| %arrayidx.15 = getelementptr inbounds float, float* %x, i64 15 |
| %15 = load float, float* %arrayidx.15, align 4 |
| %add.15 = fadd fast float %15, %add.14 |
| %arrayidx.16 = getelementptr inbounds float, float* %x, i64 16 |
| %16 = load float, float* %arrayidx.16, align 4 |
| %add.16 = fadd fast float %16, %add.15 |
| %arrayidx.17 = getelementptr inbounds float, float* %x, i64 17 |
| %17 = load float, float* %arrayidx.17, align 4 |
| %add.17 = fadd fast float %17, %add.16 |
| %arrayidx.18 = getelementptr inbounds float, float* %x, i64 18 |
| %18 = load float, float* %arrayidx.18, align 4 |
| %add.18 = fadd fast float %18, %add.17 |
| %arrayidx.19 = getelementptr inbounds float, float* %x, i64 19 |
| %19 = load float, float* %arrayidx.19, align 4 |
| %add.19 = fadd fast float %19, %add.18 |
| %arrayidx.20 = getelementptr inbounds float, float* %x, i64 20 |
| %20 = load float, float* %arrayidx.20, align 4 |
| %add.20 = fadd fast float %20, %add.19 |
| %arrayidx.21 = getelementptr inbounds float, float* %x, i64 21 |
| %21 = load float, float* %arrayidx.21, align 4 |
| %add.21 = fadd fast float %21, %add.20 |
| %arrayidx.22 = getelementptr inbounds float, float* %x, i64 22 |
| %22 = load float, float* %arrayidx.22, align 4 |
| %add.22 = fadd fast float %22, %add.21 |
| %arrayidx.23 = getelementptr inbounds float, float* %x, i64 23 |
| %23 = load float, float* %arrayidx.23, align 4 |
| %add.23 = fadd fast float %23, %add.22 |
| %arrayidx.24 = getelementptr inbounds float, float* %x, i64 24 |
| %24 = load float, float* %arrayidx.24, align 4 |
| %add.24 = fadd fast float %24, %add.23 |
| %arrayidx.25 = getelementptr inbounds float, float* %x, i64 25 |
| %25 = load float, float* %arrayidx.25, align 4 |
| %add.25 = fadd fast float %25, %add.24 |
| %arrayidx.26 = getelementptr inbounds float, float* %x, i64 26 |
| %26 = load float, float* %arrayidx.26, align 4 |
| %add.26 = fadd fast float %26, %add.25 |
| %arrayidx.27 = getelementptr inbounds float, float* %x, i64 27 |
| %27 = load float, float* %arrayidx.27, align 4 |
| %add.27 = fadd fast float %27, %add.26 |
| %arrayidx.28 = getelementptr inbounds float, float* %x, i64 28 |
| %28 = load float, float* %arrayidx.28, align 4 |
| %add.28 = fadd fast float %28, %add.27 |
| %arrayidx.29 = getelementptr inbounds float, float* %x, i64 29 |
| %29 = load float, float* %arrayidx.29, align 4 |
| %add.29 = fadd fast float %29, %add.28 |
| %arrayidx.30 = getelementptr inbounds float, float* %x, i64 30 |
| %30 = load float, float* %arrayidx.30, align 4 |
| %add.30 = fadd fast float %30, %add.29 |
| %arrayidx.31 = getelementptr inbounds float, float* %x, i64 31 |
| %31 = load float, float* %arrayidx.31, align 4 |
| %add.31 = fadd fast float %31, %add.30 |
| ret float %add.31 |
| } |
| |
| define float @loadadd31(float* nocapture readonly %x) { |
| ; CHECK-LABEL: @loadadd31( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 |
| ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 |
| ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 |
| ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 |
| ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 |
| ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 |
| ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 |
| ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* |
| ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 |
| ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 |
| ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 |
| ; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 |
| ; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 |
| ; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 |
| ; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 |
| ; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 |
| ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 |
| ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* |
| ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 |
| ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 |
| ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 |
| ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 |
| ; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 |
| ; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 |
| ; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 |
| ; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 |
| ; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 |
| ; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 |
| ; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 |
| ; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 |
| ; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 |
| ; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 |
| ; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 |
| ; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 |
| ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 |
| ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* |
| ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 |
| ; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP7]]) |
| ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP5]]) |
| ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) |
| ; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX1]], [[TMP1]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] |
| ; CHECK-NEXT: ret float [[TMP12]] |
| ; |
| ; THRESHOLD-LABEL: @loadadd31( |
| ; THRESHOLD-NEXT: entry: |
| ; THRESHOLD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 |
| ; THRESHOLD-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 |
| ; THRESHOLD-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 |
| ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* |
| ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 |
| ; THRESHOLD-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* |
| ; THRESHOLD-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 |
| ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 |
| ; THRESHOLD-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* |
| ; THRESHOLD-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 |
| ; THRESHOLD-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP7]]) |
| ; THRESHOLD-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP5]]) |
| ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]] |
| ; THRESHOLD-NEXT: [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) |
| ; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] |
| ; THRESHOLD-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX1]], [[TMP1]] |
| ; THRESHOLD-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] |
| ; THRESHOLD-NEXT: ret float [[TMP12]] |
| ; |
| entry: |
| %arrayidx = getelementptr inbounds float, float* %x, i64 1 |
| %0 = load float, float* %arrayidx, align 4 |
| %arrayidx.1 = getelementptr inbounds float, float* %x, i64 2 |
| %1 = load float, float* %arrayidx.1, align 4 |
| %add.1 = fadd fast float %1, %0 |
| %arrayidx.2 = getelementptr inbounds float, float* %x, i64 3 |
| %2 = load float, float* %arrayidx.2, align 4 |
| %add.2 = fadd fast float %2, %add.1 |
| %arrayidx.3 = getelementptr inbounds float, float* %x, i64 4 |
| %3 = load float, float* %arrayidx.3, align 4 |
| %add.3 = fadd fast float %3, %add.2 |
| %arrayidx.4 = getelementptr inbounds float, float* %x, i64 5 |
| %4 = load float, float* %arrayidx.4, align 4 |
| %add.4 = fadd fast float %4, %add.3 |
| %arrayidx.5 = getelementptr inbounds float, float* %x, i64 6 |
| %5 = load float, float* %arrayidx.5, align 4 |
| %add.5 = fadd fast float %5, %add.4 |
| %arrayidx.6 = getelementptr inbounds float, float* %x, i64 7 |
| %6 = load float, float* %arrayidx.6, align 4 |
| %add.6 = fadd fast float %6, %add.5 |
| %arrayidx.7 = getelementptr inbounds float, float* %x, i64 8 |
| %7 = load float, float* %arrayidx.7, align 4 |
| %add.7 = fadd fast float %7, %add.6 |
| %arrayidx.8 = getelementptr inbounds float, float* %x, i64 9 |
| %8 = load float, float* %arrayidx.8, align 4 |
| %add.8 = fadd fast float %8, %add.7 |
| %arrayidx.9 = getelementptr inbounds float, float* %x, i64 10 |
| %9 = load float, float* %arrayidx.9, align 4 |
| %add.9 = fadd fast float %9, %add.8 |
| %arrayidx.10 = getelementptr inbounds float, float* %x, i64 11 |
| %10 = load float, float* %arrayidx.10, align 4 |
| %add.10 = fadd fast float %10, %add.9 |
| %arrayidx.11 = getelementptr inbounds float, float* %x, i64 12 |
| %11 = load float, float* %arrayidx.11, align 4 |
| %add.11 = fadd fast float %11, %add.10 |
| %arrayidx.12 = getelementptr inbounds float, float* %x, i64 13 |
| %12 = load float, float* %arrayidx.12, align 4 |
| %add.12 = fadd fast float %12, %add.11 |
| %arrayidx.13 = getelementptr inbounds float, float* %x, i64 14 |
| %13 = load float, float* %arrayidx.13, align 4 |
| %add.13 = fadd fast float %13, %add.12 |
| %arrayidx.14 = getelementptr inbounds float, float* %x, i64 15 |
| %14 = load float, float* %arrayidx.14, align 4 |
| %add.14 = fadd fast float %14, %add.13 |
| %arrayidx.15 = getelementptr inbounds float, float* %x, i64 16 |
| %15 = load float, float* %arrayidx.15, align 4 |
| %add.15 = fadd fast float %15, %add.14 |
| %arrayidx.16 = getelementptr inbounds float, float* %x, i64 17 |
| %16 = load float, float* %arrayidx.16, align 4 |
| %add.16 = fadd fast float %16, %add.15 |
| %arrayidx.17 = getelementptr inbounds float, float* %x, i64 18 |
| %17 = load float, float* %arrayidx.17, align 4 |
| %add.17 = fadd fast float %17, %add.16 |
| %arrayidx.18 = getelementptr inbounds float, float* %x, i64 19 |
| %18 = load float, float* %arrayidx.18, align 4 |
| %add.18 = fadd fast float %18, %add.17 |
| %arrayidx.19 = getelementptr inbounds float, float* %x, i64 20 |
| %19 = load float, float* %arrayidx.19, align 4 |
| %add.19 = fadd fast float %19, %add.18 |
| %arrayidx.20 = getelementptr inbounds float, float* %x, i64 21 |
| %20 = load float, float* %arrayidx.20, align 4 |
| %add.20 = fadd fast float %20, %add.19 |
| %arrayidx.21 = getelementptr inbounds float, float* %x, i64 22 |
| %21 = load float, float* %arrayidx.21, align 4 |
| %add.21 = fadd fast float %21, %add.20 |
| %arrayidx.22 = getelementptr inbounds float, float* %x, i64 23 |
| %22 = load float, float* %arrayidx.22, align 4 |
| %add.22 = fadd fast float %22, %add.21 |
| %arrayidx.23 = getelementptr inbounds float, float* %x, i64 24 |
| %23 = load float, float* %arrayidx.23, align 4 |
| %add.23 = fadd fast float %23, %add.22 |
| %arrayidx.24 = getelementptr inbounds float, float* %x, i64 25 |
| %24 = load float, float* %arrayidx.24, align 4 |
| %add.24 = fadd fast float %24, %add.23 |
| %arrayidx.25 = getelementptr inbounds float, float* %x, i64 26 |
| %25 = load float, float* %arrayidx.25, align 4 |
| %add.25 = fadd fast float %25, %add.24 |
| %arrayidx.26 = getelementptr inbounds float, float* %x, i64 27 |
| %26 = load float, float* %arrayidx.26, align 4 |
| %add.26 = fadd fast float %26, %add.25 |
| %arrayidx.27 = getelementptr inbounds float, float* %x, i64 28 |
| %27 = load float, float* %arrayidx.27, align 4 |
| %add.27 = fadd fast float %27, %add.26 |
| %arrayidx.28 = getelementptr inbounds float, float* %x, i64 29 |
| %28 = load float, float* %arrayidx.28, align 4 |
| %add.28 = fadd fast float %28, %add.27 |
| %arrayidx.29 = getelementptr inbounds float, float* %x, i64 30 |
| %29 = load float, float* %arrayidx.29, align 4 |
| %add.29 = fadd fast float %29, %add.28 |
| ret float %add.29 |
| } |
| |
| define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { |
| ; CHECK-LABEL: @extra_args( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] |
| ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float |
| ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 |
| ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 |
| ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 |
| ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 |
| ; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 |
| ; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 |
| ; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 |
| ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) |
| ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] |
| ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] |
| ; CHECK-NEXT: ret float [[OP_EXTRA1]] |
| ; |
| ; THRESHOLD-LABEL: @extra_args( |
| ; THRESHOLD-NEXT: entry: |
| ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] |
| ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float |
| ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 |
| ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* |
| ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 |
| ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) |
| ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] |
| ; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] |
| ; THRESHOLD-NEXT: ret float [[OP_EXTRA1]] |
| ; |
| entry: |
| %mul = mul nsw i32 %b, %a |
| %conv = sitofp i32 %mul to float |
| %0 = load float, float* %x, align 4 |
| %add = fadd fast float %conv, 3.000000e+00 |
| %add1 = fadd fast float %0, %add |
| %arrayidx3 = getelementptr inbounds float, float* %x, i64 1 |
| %1 = load float, float* %arrayidx3, align 4 |
| %add4 = fadd fast float %1, %add1 |
| %add5 = fadd fast float %add4, %conv |
| %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2 |
| %2 = load float, float* %arrayidx3.1, align 4 |
| %add4.1 = fadd fast float %2, %add5 |
| %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3 |
| %3 = load float, float* %arrayidx3.2, align 4 |
| %add4.2 = fadd fast float %3, %add4.1 |
| %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4 |
| %4 = load float, float* %arrayidx3.3, align 4 |
| %add4.3 = fadd fast float %4, %add4.2 |
| %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5 |
| %5 = load float, float* %arrayidx3.4, align 4 |
| %add4.4 = fadd fast float %5, %add4.3 |
| %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6 |
| %6 = load float, float* %arrayidx3.5, align 4 |
| %add4.5 = fadd fast float %6, %add4.4 |
| %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7 |
| %7 = load float, float* %arrayidx3.6, align 4 |
| %add4.6 = fadd fast float %7, %add4.5 |
| ret float %add4.6 |
| } |
| |
| define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a, i32 %b) { |
| ; CHECK-LABEL: @extra_args_same_several_times( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] |
| ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float |
| ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 |
| ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 |
| ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 |
| ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 |
| ; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 |
| ; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 |
| ; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 |
| ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) |
| ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] |
| ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 |
| ; CHECK-NEXT: [[OP_EXTRA2:%.*]] = fadd fast float [[OP_EXTRA1]], 5.000000e+00 |
| ; CHECK-NEXT: [[OP_EXTRA3:%.*]] = fadd fast float [[OP_EXTRA2]], [[CONV]] |
| ; CHECK-NEXT: ret float [[OP_EXTRA3]] |
| ; |
| ; THRESHOLD-LABEL: @extra_args_same_several_times( |
| ; THRESHOLD-NEXT: entry: |
| ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] |
| ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float |
| ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 |
| ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* |
| ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 |
| ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) |
| ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] |
| ; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 |
| ; THRESHOLD-NEXT: [[OP_EXTRA2:%.*]] = fadd fast float [[OP_EXTRA1]], 5.000000e+00 |
| ; THRESHOLD-NEXT: [[OP_EXTRA3:%.*]] = fadd fast float [[OP_EXTRA2]], [[CONV]] |
| ; THRESHOLD-NEXT: ret float [[OP_EXTRA3]] |
| ; |
| entry: |
| %mul = mul nsw i32 %b, %a |
| %conv = sitofp i32 %mul to float |
| %0 = load float, float* %x, align 4 |
| %add = fadd fast float %conv, 3.000000e+00 |
| %add1 = fadd fast float %0, %add |
| %arrayidx3 = getelementptr inbounds float, float* %x, i64 1 |
| %1 = load float, float* %arrayidx3, align 4 |
| %add4 = fadd fast float %1, %add1 |
| %add41 = fadd fast float %add4, 5.000000e+00 |
| %add5 = fadd fast float %add41, %conv |
| %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2 |
| %2 = load float, float* %arrayidx3.1, align 4 |
| %add4.1 = fadd fast float %2, %add5 |
| %add4.11 = fadd fast float %add4.1, 5.000000e+00 |
| %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3 |
| %3 = load float, float* %arrayidx3.2, align 4 |
| %add4.2 = fadd fast float %3, %add4.11 |
| %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4 |
| %4 = load float, float* %arrayidx3.3, align 4 |
| %add4.3 = fadd fast float %4, %add4.2 |
| %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5 |
| %5 = load float, float* %arrayidx3.4, align 4 |
| %add4.4 = fadd fast float %5, %add4.3 |
| %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6 |
| %6 = load float, float* %arrayidx3.5, align 4 |
| %add4.5 = fadd fast float %6, %add4.4 |
| %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7 |
| %7 = load float, float* %arrayidx3.6, align 4 |
| %add4.6 = fadd fast float %7, %add4.5 |
| ret float %add4.6 |
| } |
| |
| define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b, i32 %c) { |
| ; CHECK-LABEL: @extra_args_no_replace( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] |
| ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float |
| ; CHECK-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float |
| ; CHECK-NEXT: [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00 |
| ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]] |
| ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 |
| ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 |
| ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 |
| ; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 |
| ; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 |
| ; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 |
| ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) |
| ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] |
| ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] |
| ; CHECK-NEXT: ret float [[OP_EXTRA1]] |
| ; |
| ; THRESHOLD-LABEL: @extra_args_no_replace( |
| ; THRESHOLD-NEXT: entry: |
| ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] |
| ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float |
| ; THRESHOLD-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float |
| ; THRESHOLD-NEXT: [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00 |
| ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]] |
| ; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 |
| ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* |
| ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 |
| ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) |
| ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] |
| ; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] |
| ; THRESHOLD-NEXT: ret float [[OP_EXTRA1]] |
| ; |
| entry: |
| %mul = mul nsw i32 %b, %a |
| %conv = sitofp i32 %mul to float |
| %0 = load float, float* %x, align 4 |
| %convc = sitofp i32 %c to float |
| %addc = fadd fast float %convc, 3.000000e+00 |
| %add = fadd fast float %conv, %addc |
| %add1 = fadd fast float %0, %add |
| %arrayidx3 = getelementptr inbounds float, float* %x, i64 1 |
| %1 = load float, float* %arrayidx3, align 4 |
| %add4 = fadd fast float %1, %add1 |
| %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2 |
| %2 = load float, float* %arrayidx3.1, align 4 |
| %add4.1 = fadd fast float %2, %add4 |
| %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3 |
| %3 = load float, float* %arrayidx3.2, align 4 |
| %add4.2 = fadd fast float %3, %add4.1 |
| %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4 |
| %4 = load float, float* %arrayidx3.3, align 4 |
| %add4.3 = fadd fast float %4, %add4.2 |
| %add5 = fadd fast float %add4.3, %conv |
| %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5 |
| %5 = load float, float* %arrayidx3.4, align 4 |
| %add4.4 = fadd fast float %5, %add5 |
| %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6 |
| %6 = load float, float* %arrayidx3.5, align 4 |
| %add4.5 = fadd fast float %6, %add4.4 |
| %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7 |
| %7 = load float, float* %arrayidx3.6, align 4 |
| %add4.6 = fadd fast float %7, %add4.5 |
| ret float %add4.6 |
| } |
| |
| define float @extra_args_no_fast(float* %x, float %a, float %b) { |
| ; CHECK-LABEL: @extra_args_no_fast( |
| ; CHECK-NEXT: [[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.000000e+00 |
| ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]] |
| ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 |
| ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 |
| ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 |
| ; CHECK-NEXT: [[T0:%.*]] = load float, float* [[X]], align 4 |
| ; CHECK-NEXT: [[T1:%.*]] = load float, float* [[ARRAYIDX3]], align 4 |
| ; CHECK-NEXT: [[T2:%.*]] = load float, float* [[ARRAYIDX3_1]], align 4 |
| ; CHECK-NEXT: [[T3:%.*]] = load float, float* [[ARRAYIDX3_2]], align 4 |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]] |
| ; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]] |
| ; CHECK-NEXT: [[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]] |
| ; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float [[T3]], [[ADD4_1]] |
| ; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_2]], [[A]] |
| ; CHECK-NEXT: ret float [[ADD5]] |
| ; |
| ; THRESHOLD-LABEL: @extra_args_no_fast( |
| ; THRESHOLD-NEXT: [[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.000000e+00 |
| ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]] |
| ; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 |
| ; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 |
| ; THRESHOLD-NEXT: [[T0:%.*]] = load float, float* [[X]], align 4 |
| ; THRESHOLD-NEXT: [[T1:%.*]] = load float, float* [[ARRAYIDX3]], align 4 |
| ; THRESHOLD-NEXT: [[T2:%.*]] = load float, float* [[ARRAYIDX3_1]], align 4 |
| ; THRESHOLD-NEXT: [[T3:%.*]] = load float, float* [[ARRAYIDX3_2]], align 4 |
| ; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]] |
| ; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]] |
| ; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]] |
| ; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float [[T3]], [[ADD4_1]] |
| ; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_2]], [[A]] |
| ; THRESHOLD-NEXT: ret float [[ADD5]] |
| ; |
| %addc = fadd fast float %b, 3.0 |
| %add = fadd fast float %a, %addc |
| %arrayidx3 = getelementptr inbounds float, float* %x, i64 1 |
| %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2 |
| %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3 |
| %t0 = load float, float* %x, align 4 |
| %t1 = load float, float* %arrayidx3, align 4 |
| %t2 = load float, float* %arrayidx3.1, align 4 |
| %t3 = load float, float* %arrayidx3.2, align 4 |
| %add1 = fadd fast float %t0, %add |
| %add4 = fadd fast float %t1, %add1 |
| %add4.1 = fadd float %t2, %add4 ; this is not a reduction candidate |
| %add4.2 = fadd fast float %t3, %add4.1 |
| %add5 = fadd fast float %add4.2, %a |
| ret float %add5 |
| } |
| |
| define i32 @wobble(i32 %arg, i32 %bar) { |
| ; CHECK-LABEL: @wobble( |
| ; CHECK-NEXT: bb: |
| ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1 |
| ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2 |
| ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3 |
| ; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[TMP3]], [[TMP7]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 |
| ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer |
| ; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> |
| ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) |
| ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] |
| ; CHECK-NEXT: [[OP_EXTRA1:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] |
| ; CHECK-NEXT: ret i32 [[OP_EXTRA1]] |
| ; |
| ; THRESHOLD-LABEL: @wobble( |
| ; THRESHOLD-NEXT: bb: |
| ; THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0 |
| ; THRESHOLD-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1 |
| ; THRESHOLD-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2 |
| ; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3 |
| ; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0 |
| ; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1 |
| ; THRESHOLD-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2 |
| ; THRESHOLD-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3 |
| ; THRESHOLD-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[TMP3]], [[TMP7]] |
| ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 |
| ; THRESHOLD-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer |
| ; THRESHOLD-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> |
| ; THRESHOLD-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) |
| ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] |
| ; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] |
| ; THRESHOLD-NEXT: ret i32 [[OP_EXTRA1]] |
| ; |
| bb: |
| %x1 = xor i32 %arg, %bar |
| %i1 = icmp eq i32 %x1, 0 |
| %s1 = sext i1 %i1 to i32 |
| %x2 = xor i32 %arg, %bar |
| %i2 = icmp eq i32 %x2, 0 |
| %s2 = sext i1 %i2 to i32 |
| %x3 = xor i32 %arg, %bar |
| %i3 = icmp eq i32 %x3, 0 |
| %s3 = sext i1 %i3 to i32 |
| %x4 = xor i32 %arg, %bar |
| %i4 = icmp eq i32 %x4, 0 |
| %s4 = sext i1 %i4 to i32 |
| %r1 = add nuw i32 %arg, %s1 |
| %r2 = add nsw i32 %r1, %s2 |
| %r3 = add nsw i32 %r2, %s3 |
| %r4 = add nsw i32 %r3, %s4 |
| %r5 = add nsw i32 %r4, %x4 |
| ret i32 %r5 |
| } |
| |