| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z15 -passes=slp-vectorizer %s -S -o - \ |
| ; RUN: | FileCheck %s |
| |
| ; Test vectorization and reassociation of fmin/fmax operations. Vectorization |
| ; is more profitable if the loads are also vectorizable. |
| |
| define double @fmin_double_4_nums_seq(ptr nocapture noundef readonly %x) { |
| ; CHECK-LABEL: define double @fmin_double_4_nums_seq( |
| ; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[X]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[TMP1]]) |
| ; CHECK-NEXT: ret double [[TMP2]] |
| ; |
| %g1 = getelementptr inbounds double, ptr %x, i64 1 |
| %g2 = getelementptr inbounds double, ptr %x, i64 2 |
| %g3 = getelementptr inbounds double, ptr %x, i64 3 |
| %t0 = load double, ptr %x, align 4 |
| %t1 = load double, ptr %g1, align 4 |
| %t2 = load double, ptr %g2, align 4 |
| %t3 = load double, ptr %g3, align 4 |
| %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0) |
| %m2 = tail call fast double @llvm.minnum.f64(double %t2, double %m1) |
| %m3 = tail call fast double @llvm.minnum.f64(double %t3, double %m2) |
| ret double %m3 |
| } |
| |
| define double @fmin_double_16_nums_nonseq(ptr nocapture noundef readonly %x) { |
| ; CHECK-LABEL: define double @fmin_double_16_nums_nonseq( |
| ; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 |
| ; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 4 |
| ; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 6 |
| ; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8 |
| ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds double, ptr [[X]], i64 10 |
| ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 12 |
| ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds double, ptr [[X]], i64 14 |
| ; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 16 |
| ; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds double, ptr [[X]], i64 18 |
| ; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds double, ptr [[X]], i64 20 |
| ; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds double, ptr [[X]], i64 22 |
| ; CHECK-NEXT: [[G12:%.*]] = getelementptr inbounds double, ptr [[X]], i64 24 |
| ; CHECK-NEXT: [[G13:%.*]] = getelementptr inbounds double, ptr [[X]], i64 26 |
| ; CHECK-NEXT: [[G14:%.*]] = getelementptr inbounds double, ptr [[X]], i64 28 |
| ; CHECK-NEXT: [[G15:%.*]] = getelementptr inbounds double, ptr [[X]], i64 30 |
| ; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[X]], align 4 |
| ; CHECK-NEXT: [[T1:%.*]] = load double, ptr [[G1]], align 4 |
| ; CHECK-NEXT: [[T2:%.*]] = load double, ptr [[G2]], align 4 |
| ; CHECK-NEXT: [[T3:%.*]] = load double, ptr [[G3]], align 4 |
| ; CHECK-NEXT: [[T4:%.*]] = load double, ptr [[G4]], align 4 |
| ; CHECK-NEXT: [[T5:%.*]] = load double, ptr [[G5]], align 4 |
| ; CHECK-NEXT: [[T6:%.*]] = load double, ptr [[G6]], align 4 |
| ; CHECK-NEXT: [[T7:%.*]] = load double, ptr [[G7]], align 4 |
| ; CHECK-NEXT: [[T8:%.*]] = load double, ptr [[G8]], align 4 |
| ; CHECK-NEXT: [[T9:%.*]] = load double, ptr [[G9]], align 4 |
| ; CHECK-NEXT: [[T10:%.*]] = load double, ptr [[G10]], align 4 |
| ; CHECK-NEXT: [[T11:%.*]] = load double, ptr [[G11]], align 4 |
| ; CHECK-NEXT: [[T12:%.*]] = load double, ptr [[G12]], align 4 |
| ; CHECK-NEXT: [[T13:%.*]] = load double, ptr [[G13]], align 4 |
| ; CHECK-NEXT: [[T14:%.*]] = load double, ptr [[G14]], align 4 |
| ; CHECK-NEXT: [[T15:%.*]] = load double, ptr [[G15]], align 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x double> poison, double [[T1]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x double> [[TMP1]], double [[T0]], i32 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x double> [[TMP2]], double [[T2]], i32 2 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x double> [[TMP3]], double [[T3]], i32 3 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x double> [[TMP4]], double [[T4]], i32 4 |
| ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x double> [[TMP5]], double [[T5]], i32 5 |
| ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x double> [[TMP6]], double [[T6]], i32 6 |
| ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x double> [[TMP7]], double [[T7]], i32 7 |
| ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x double> [[TMP8]], double [[T8]], i32 8 |
| ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x double> [[TMP9]], double [[T9]], i32 9 |
| ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x double> [[TMP10]], double [[T10]], i32 10 |
| ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x double> [[TMP11]], double [[T11]], i32 11 |
| ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x double> [[TMP12]], double [[T12]], i32 12 |
| ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x double> [[TMP13]], double [[T13]], i32 13 |
| ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x double> [[TMP14]], double [[T14]], i32 14 |
| ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x double> [[TMP15]], double [[T15]], i32 15 |
| ; CHECK-NEXT: [[TMP17:%.*]] = call fast double @llvm.vector.reduce.fmin.v16f64(<16 x double> [[TMP16]]) |
| ; CHECK-NEXT: ret double [[TMP17]] |
| ; |
| %g1 = getelementptr inbounds double, ptr %x, i64 2 |
| %g2 = getelementptr inbounds double, ptr %x, i64 4 |
| %g3 = getelementptr inbounds double, ptr %x, i64 6 |
| %g4 = getelementptr inbounds double, ptr %x, i64 8 |
| %g5 = getelementptr inbounds double, ptr %x, i64 10 |
| %g6 = getelementptr inbounds double, ptr %x, i64 12 |
| %g7 = getelementptr inbounds double, ptr %x, i64 14 |
| %g8 = getelementptr inbounds double, ptr %x, i64 16 |
| %g9 = getelementptr inbounds double, ptr %x, i64 18 |
| %g10 = getelementptr inbounds double, ptr %x, i64 20 |
| %g11 = getelementptr inbounds double, ptr %x, i64 22 |
| %g12 = getelementptr inbounds double, ptr %x, i64 24 |
| %g13 = getelementptr inbounds double, ptr %x, i64 26 |
| %g14 = getelementptr inbounds double, ptr %x, i64 28 |
| %g15 = getelementptr inbounds double, ptr %x, i64 30 |
| %t0 = load double, ptr %x, align 4 |
| %t1 = load double, ptr %g1, align 4 |
| %t2 = load double, ptr %g2, align 4 |
| %t3 = load double, ptr %g3, align 4 |
| %t4 = load double, ptr %g4, align 4 |
| %t5 = load double, ptr %g5, align 4 |
| %t6 = load double, ptr %g6, align 4 |
| %t7 = load double, ptr %g7, align 4 |
| %t8 = load double, ptr %g8, align 4 |
| %t9 = load double, ptr %g9, align 4 |
| %t10 = load double, ptr %g10, align 4 |
| %t11 = load double, ptr %g11, align 4 |
| %t12 = load double, ptr %g12, align 4 |
| %t13 = load double, ptr %g13, align 4 |
| %t14 = load double, ptr %g14, align 4 |
| %t15 = load double, ptr %g15, align 4 |
| %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0) |
| %m2 = tail call fast double @llvm.minnum.f64(double %t2, double %m1) |
| %m3 = tail call fast double @llvm.minnum.f64(double %t3, double %m2) |
| %m4 = tail call fast double @llvm.minnum.f64(double %t4, double %m3) |
| %m5 = tail call fast double @llvm.minnum.f64(double %t5, double %m4) |
| %m6 = tail call fast double @llvm.minnum.f64(double %t6, double %m5) |
| %m7 = tail call fast double @llvm.minnum.f64(double %t7, double %m6) |
| %m8 = tail call fast double @llvm.minnum.f64(double %t8, double %m7) |
| %m9 = tail call fast double @llvm.minnum.f64(double %t9, double %m8) |
| %m10 = tail call fast double @llvm.minnum.f64(double %t10, double %m9) |
| %m11 = tail call fast double @llvm.minnum.f64(double %t11, double %m10) |
| %m12 = tail call fast double @llvm.minnum.f64(double %t12, double %m11) |
| %m13 = tail call fast double @llvm.minnum.f64(double %t13, double %m12) |
| %m14 = tail call fast double @llvm.minnum.f64(double %t14, double %m13) |
| %m15 = tail call fast double @llvm.minnum.f64(double %t15, double %m14) |
| ret double %m15 |
| } |
| |
| define float @fmin_float_12_nums_nonseq(ptr nocapture noundef readonly %x) { |
| ; CHECK-LABEL: define float @fmin_float_12_nums_nonseq( |
| ; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 |
| ; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 4 |
| ; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 6 |
| ; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 8 |
| ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, ptr [[X]], i64 10 |
| ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, ptr [[X]], i64 12 |
| ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, ptr [[X]], i64 14 |
| ; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds float, ptr [[X]], i64 16 |
| ; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds float, ptr [[X]], i64 18 |
| ; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds float, ptr [[X]], i64 20 |
| ; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds float, ptr [[X]], i64 22 |
| ; CHECK-NEXT: [[T0:%.*]] = load float, ptr [[X]], align 4 |
| ; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4 |
| ; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4 |
| ; CHECK-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4 |
| ; CHECK-NEXT: [[T4:%.*]] = load float, ptr [[G4]], align 4 |
| ; CHECK-NEXT: [[T5:%.*]] = load float, ptr [[G5]], align 4 |
| ; CHECK-NEXT: [[T6:%.*]] = load float, ptr [[G6]], align 4 |
| ; CHECK-NEXT: [[T7:%.*]] = load float, ptr [[G7]], align 4 |
| ; CHECK-NEXT: [[T8:%.*]] = load float, ptr [[G8]], align 4 |
| ; CHECK-NEXT: [[T9:%.*]] = load float, ptr [[G9]], align 4 |
| ; CHECK-NEXT: [[T10:%.*]] = load float, ptr [[G10]], align 4 |
| ; CHECK-NEXT: [[T11:%.*]] = load float, ptr [[G11]], align 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <12 x float> poison, float [[T1]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <12 x float> [[TMP1]], float [[T0]], i32 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <12 x float> [[TMP2]], float [[T2]], i32 2 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <12 x float> [[TMP3]], float [[T3]], i32 3 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <12 x float> [[TMP4]], float [[T4]], i32 4 |
| ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <12 x float> [[TMP5]], float [[T5]], i32 5 |
| ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <12 x float> [[TMP6]], float [[T6]], i32 6 |
| ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <12 x float> [[TMP7]], float [[T7]], i32 7 |
| ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <12 x float> [[TMP8]], float [[T8]], i32 8 |
| ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <12 x float> [[TMP9]], float [[T9]], i32 9 |
| ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <12 x float> [[TMP10]], float [[T10]], i32 10 |
| ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <12 x float> [[TMP11]], float [[T11]], i32 11 |
| ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.v12f32(<12 x float> [[TMP12]]) |
| ; CHECK-NEXT: ret float [[TMP13]] |
| ; |
| %g1 = getelementptr inbounds float, ptr %x, i64 2 |
| %g2 = getelementptr inbounds float, ptr %x, i64 4 |
| %g3 = getelementptr inbounds float, ptr %x, i64 6 |
| %g4 = getelementptr inbounds float, ptr %x, i64 8 |
| %g5 = getelementptr inbounds float, ptr %x, i64 10 |
| %g6 = getelementptr inbounds float, ptr %x, i64 12 |
| %g7 = getelementptr inbounds float, ptr %x, i64 14 |
| %g8 = getelementptr inbounds float, ptr %x, i64 16 |
| %g9 = getelementptr inbounds float, ptr %x, i64 18 |
| %g10 = getelementptr inbounds float, ptr %x, i64 20 |
| %g11 = getelementptr inbounds float, ptr %x, i64 22 |
| %t0 = load float, ptr %x, align 4 |
| %t1 = load float, ptr %g1, align 4 |
| %t2 = load float, ptr %g2, align 4 |
| %t3 = load float, ptr %g3, align 4 |
| %t4 = load float, ptr %g4, align 4 |
| %t5 = load float, ptr %g5, align 4 |
| %t6 = load float, ptr %g6, align 4 |
| %t7 = load float, ptr %g7, align 4 |
| %t8 = load float, ptr %g8, align 4 |
| %t9 = load float, ptr %g9, align 4 |
| %t10 = load float, ptr %g10, align 4 |
| %t11 = load float, ptr %g11, align 4 |
| %m1 = tail call fast float @llvm.minnum.f32(float %t1, float %t0) |
| %m2 = tail call fast float @llvm.minnum.f32(float %t2, float %m1) |
| %m3 = tail call fast float @llvm.minnum.f32(float %t3, float %m2) |
| %m4 = tail call fast float @llvm.minnum.f32(float %t4, float %m3) |
| %m5 = tail call fast float @llvm.minnum.f32(float %t5, float %m4) |
| %m6 = tail call fast float @llvm.minnum.f32(float %t6, float %m5) |
| %m7 = tail call fast float @llvm.minnum.f32(float %t7, float %m6) |
| %m8 = tail call fast float @llvm.minnum.f32(float %t8, float %m7) |
| %m9 = tail call fast float @llvm.minnum.f32(float %t9, float %m8) |
| %m10 = tail call fast float @llvm.minnum.f32(float %t10, float %m9) |
| %m11 = tail call fast float @llvm.minnum.f32(float %t11, float %m10) |
| ret float %m11 |
| } |
| |
| define double @fmax_double_4_nums_seq(ptr nocapture noundef readonly %x) { |
| ; CHECK-LABEL: define double @fmax_double_4_nums_seq( |
| ; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[X]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[TMP1]]) |
| ; CHECK-NEXT: ret double [[TMP2]] |
| ; |
| %g1 = getelementptr inbounds double, ptr %x, i64 1 |
| %g2 = getelementptr inbounds double, ptr %x, i64 2 |
| %g3 = getelementptr inbounds double, ptr %x, i64 3 |
| %t0 = load double, ptr %x, align 4 |
| %t1 = load double, ptr %g1, align 4 |
| %t2 = load double, ptr %g2, align 4 |
| %t3 = load double, ptr %g3, align 4 |
| %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0) |
| %m2 = tail call fast double @llvm.maxnum.f64(double %t2, double %m1) |
| %m3 = tail call fast double @llvm.maxnum.f64(double %t3, double %m2) |
| ret double %m3 |
| } |
| |
| define double @fmax_double_16_nums_nonseq(ptr nocapture noundef readonly %x) { |
| ; CHECK-LABEL: define double @fmax_double_16_nums_nonseq( |
| ; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 |
| ; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 4 |
| ; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 6 |
| ; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8 |
| ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds double, ptr [[X]], i64 10 |
| ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 12 |
| ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds double, ptr [[X]], i64 14 |
| ; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 16 |
| ; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds double, ptr [[X]], i64 18 |
| ; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds double, ptr [[X]], i64 20 |
| ; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds double, ptr [[X]], i64 22 |
| ; CHECK-NEXT: [[G12:%.*]] = getelementptr inbounds double, ptr [[X]], i64 24 |
| ; CHECK-NEXT: [[G13:%.*]] = getelementptr inbounds double, ptr [[X]], i64 26 |
| ; CHECK-NEXT: [[G14:%.*]] = getelementptr inbounds double, ptr [[X]], i64 28 |
| ; CHECK-NEXT: [[G15:%.*]] = getelementptr inbounds double, ptr [[X]], i64 30 |
| ; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[X]], align 4 |
| ; CHECK-NEXT: [[T1:%.*]] = load double, ptr [[G1]], align 4 |
| ; CHECK-NEXT: [[T2:%.*]] = load double, ptr [[G2]], align 4 |
| ; CHECK-NEXT: [[T3:%.*]] = load double, ptr [[G3]], align 4 |
| ; CHECK-NEXT: [[T4:%.*]] = load double, ptr [[G4]], align 4 |
| ; CHECK-NEXT: [[T5:%.*]] = load double, ptr [[G5]], align 4 |
| ; CHECK-NEXT: [[T6:%.*]] = load double, ptr [[G6]], align 4 |
| ; CHECK-NEXT: [[T7:%.*]] = load double, ptr [[G7]], align 4 |
| ; CHECK-NEXT: [[T8:%.*]] = load double, ptr [[G8]], align 4 |
| ; CHECK-NEXT: [[T9:%.*]] = load double, ptr [[G9]], align 4 |
| ; CHECK-NEXT: [[T10:%.*]] = load double, ptr [[G10]], align 4 |
| ; CHECK-NEXT: [[T11:%.*]] = load double, ptr [[G11]], align 4 |
| ; CHECK-NEXT: [[T12:%.*]] = load double, ptr [[G12]], align 4 |
| ; CHECK-NEXT: [[T13:%.*]] = load double, ptr [[G13]], align 4 |
| ; CHECK-NEXT: [[T14:%.*]] = load double, ptr [[G14]], align 4 |
| ; CHECK-NEXT: [[T15:%.*]] = load double, ptr [[G15]], align 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x double> poison, double [[T1]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x double> [[TMP1]], double [[T0]], i32 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x double> [[TMP2]], double [[T2]], i32 2 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x double> [[TMP3]], double [[T3]], i32 3 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x double> [[TMP4]], double [[T4]], i32 4 |
| ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x double> [[TMP5]], double [[T5]], i32 5 |
| ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x double> [[TMP6]], double [[T6]], i32 6 |
| ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x double> [[TMP7]], double [[T7]], i32 7 |
| ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x double> [[TMP8]], double [[T8]], i32 8 |
| ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x double> [[TMP9]], double [[T9]], i32 9 |
| ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x double> [[TMP10]], double [[T10]], i32 10 |
| ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x double> [[TMP11]], double [[T11]], i32 11 |
| ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x double> [[TMP12]], double [[T12]], i32 12 |
| ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x double> [[TMP13]], double [[T13]], i32 13 |
| ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x double> [[TMP14]], double [[T14]], i32 14 |
| ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x double> [[TMP15]], double [[T15]], i32 15 |
| ; CHECK-NEXT: [[TMP17:%.*]] = call fast double @llvm.vector.reduce.fmax.v16f64(<16 x double> [[TMP16]]) |
| ; CHECK-NEXT: ret double [[TMP17]] |
| ; |
| %g1 = getelementptr inbounds double, ptr %x, i64 2 |
| %g2 = getelementptr inbounds double, ptr %x, i64 4 |
| %g3 = getelementptr inbounds double, ptr %x, i64 6 |
| %g4 = getelementptr inbounds double, ptr %x, i64 8 |
| %g5 = getelementptr inbounds double, ptr %x, i64 10 |
| %g6 = getelementptr inbounds double, ptr %x, i64 12 |
| %g7 = getelementptr inbounds double, ptr %x, i64 14 |
| %g8 = getelementptr inbounds double, ptr %x, i64 16 |
| %g9 = getelementptr inbounds double, ptr %x, i64 18 |
| %g10 = getelementptr inbounds double, ptr %x, i64 20 |
| %g11 = getelementptr inbounds double, ptr %x, i64 22 |
| %g12 = getelementptr inbounds double, ptr %x, i64 24 |
| %g13 = getelementptr inbounds double, ptr %x, i64 26 |
| %g14 = getelementptr inbounds double, ptr %x, i64 28 |
| %g15 = getelementptr inbounds double, ptr %x, i64 30 |
| %t0 = load double, ptr %x, align 4 |
| %t1 = load double, ptr %g1, align 4 |
| %t2 = load double, ptr %g2, align 4 |
| %t3 = load double, ptr %g3, align 4 |
| %t4 = load double, ptr %g4, align 4 |
| %t5 = load double, ptr %g5, align 4 |
| %t6 = load double, ptr %g6, align 4 |
| %t7 = load double, ptr %g7, align 4 |
| %t8 = load double, ptr %g8, align 4 |
| %t9 = load double, ptr %g9, align 4 |
| %t10 = load double, ptr %g10, align 4 |
| %t11 = load double, ptr %g11, align 4 |
| %t12 = load double, ptr %g12, align 4 |
| %t13 = load double, ptr %g13, align 4 |
| %t14 = load double, ptr %g14, align 4 |
| %t15 = load double, ptr %g15, align 4 |
| %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0) |
| %m2 = tail call fast double @llvm.maxnum.f64(double %t2, double %m1) |
| %m3 = tail call fast double @llvm.maxnum.f64(double %t3, double %m2) |
| %m4 = tail call fast double @llvm.maxnum.f64(double %t4, double %m3) |
| %m5 = tail call fast double @llvm.maxnum.f64(double %t5, double %m4) |
| %m6 = tail call fast double @llvm.maxnum.f64(double %t6, double %m5) |
| %m7 = tail call fast double @llvm.maxnum.f64(double %t7, double %m6) |
| %m8 = tail call fast double @llvm.maxnum.f64(double %t8, double %m7) |
| %m9 = tail call fast double @llvm.maxnum.f64(double %t9, double %m8) |
| %m10 = tail call fast double @llvm.maxnum.f64(double %t10, double %m9) |
| %m11 = tail call fast double @llvm.maxnum.f64(double %t11, double %m10) |
| %m12 = tail call fast double @llvm.maxnum.f64(double %t12, double %m11) |
| %m13 = tail call fast double @llvm.maxnum.f64(double %t13, double %m12) |
| %m14 = tail call fast double @llvm.maxnum.f64(double %t14, double %m13) |
| %m15 = tail call fast double @llvm.maxnum.f64(double %t15, double %m14) |
| ret double %m15 |
| } |
| |
| define float @fmax_float_12_nums_nonseq(ptr nocapture noundef readonly %x) { |
| ; CHECK-LABEL: define float @fmax_float_12_nums_nonseq( |
| ; CHECK-SAME: ptr noundef readonly captures(none) [[X:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 |
| ; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 4 |
| ; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 6 |
| ; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 8 |
| ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, ptr [[X]], i64 10 |
| ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, ptr [[X]], i64 12 |
| ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, ptr [[X]], i64 14 |
| ; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds float, ptr [[X]], i64 16 |
| ; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds float, ptr [[X]], i64 18 |
| ; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds float, ptr [[X]], i64 20 |
| ; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds float, ptr [[X]], i64 22 |
| ; CHECK-NEXT: [[T0:%.*]] = load float, ptr [[X]], align 4 |
| ; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4 |
| ; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4 |
| ; CHECK-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4 |
| ; CHECK-NEXT: [[T4:%.*]] = load float, ptr [[G4]], align 4 |
| ; CHECK-NEXT: [[T5:%.*]] = load float, ptr [[G5]], align 4 |
| ; CHECK-NEXT: [[T6:%.*]] = load float, ptr [[G6]], align 4 |
| ; CHECK-NEXT: [[T7:%.*]] = load float, ptr [[G7]], align 4 |
| ; CHECK-NEXT: [[T8:%.*]] = load float, ptr [[G8]], align 4 |
| ; CHECK-NEXT: [[T9:%.*]] = load float, ptr [[G9]], align 4 |
| ; CHECK-NEXT: [[T10:%.*]] = load float, ptr [[G10]], align 4 |
| ; CHECK-NEXT: [[T11:%.*]] = load float, ptr [[G11]], align 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <12 x float> poison, float [[T1]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <12 x float> [[TMP1]], float [[T0]], i32 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <12 x float> [[TMP2]], float [[T2]], i32 2 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <12 x float> [[TMP3]], float [[T3]], i32 3 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <12 x float> [[TMP4]], float [[T4]], i32 4 |
| ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <12 x float> [[TMP5]], float [[T5]], i32 5 |
| ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <12 x float> [[TMP6]], float [[T6]], i32 6 |
| ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <12 x float> [[TMP7]], float [[T7]], i32 7 |
| ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <12 x float> [[TMP8]], float [[T8]], i32 8 |
| ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <12 x float> [[TMP9]], float [[T9]], i32 9 |
| ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <12 x float> [[TMP10]], float [[T10]], i32 10 |
| ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <12 x float> [[TMP11]], float [[T11]], i32 11 |
| ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmax.v12f32(<12 x float> [[TMP12]]) |
| ; CHECK-NEXT: ret float [[TMP13]] |
| ; |
| %g1 = getelementptr inbounds float, ptr %x, i64 2 |
| %g2 = getelementptr inbounds float, ptr %x, i64 4 |
| %g3 = getelementptr inbounds float, ptr %x, i64 6 |
| %g4 = getelementptr inbounds float, ptr %x, i64 8 |
| %g5 = getelementptr inbounds float, ptr %x, i64 10 |
| %g6 = getelementptr inbounds float, ptr %x, i64 12 |
| %g7 = getelementptr inbounds float, ptr %x, i64 14 |
| %g8 = getelementptr inbounds float, ptr %x, i64 16 |
| %g9 = getelementptr inbounds float, ptr %x, i64 18 |
| %g10 = getelementptr inbounds float, ptr %x, i64 20 |
| %g11 = getelementptr inbounds float, ptr %x, i64 22 |
| %t0 = load float, ptr %x, align 4 |
| %t1 = load float, ptr %g1, align 4 |
| %t2 = load float, ptr %g2, align 4 |
| %t3 = load float, ptr %g3, align 4 |
| %t4 = load float, ptr %g4, align 4 |
| %t5 = load float, ptr %g5, align 4 |
| %t6 = load float, ptr %g6, align 4 |
| %t7 = load float, ptr %g7, align 4 |
| %t8 = load float, ptr %g8, align 4 |
| %t9 = load float, ptr %g9, align 4 |
| %t10 = load float, ptr %g10, align 4 |
| %t11 = load float, ptr %g11, align 4 |
| %m1 = tail call fast float @llvm.maxnum.f32(float %t1, float %t0) |
| %m2 = tail call fast float @llvm.maxnum.f32(float %t2, float %m1) |
| %m3 = tail call fast float @llvm.maxnum.f32(float %t3, float %m2) |
| %m4 = tail call fast float @llvm.maxnum.f32(float %t4, float %m3) |
| %m5 = tail call fast float @llvm.maxnum.f32(float %t5, float %m4) |
| %m6 = tail call fast float @llvm.maxnum.f32(float %t6, float %m5) |
| %m7 = tail call fast float @llvm.maxnum.f32(float %t7, float %m6) |
| %m8 = tail call fast float @llvm.maxnum.f32(float %t8, float %m7) |
| %m9 = tail call fast float @llvm.maxnum.f32(float %t9, float %m8) |
| %m10 = tail call fast float @llvm.maxnum.f32(float %t10, float %m9) |
| %m11 = tail call fast float @llvm.maxnum.f32(float %t11, float %m10) |
| ret float %m11 |
| } |
| |
| declare float @llvm.minnum.f32(float, float) |
| declare double @llvm.minnum.f64(double, double) |
| declare float @llvm.maxnum.f32(float, float) |
| declare double @llvm.maxnum.f64(double, double) |