| ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py |
| ; RUN: opt -passes='print<cost-model>' -disable-output -mtriple=s390x-unknown-linux \ |
| ; RUN: -mcpu=z15 < %s 2>&1 | FileCheck %s --check-prefix=Z15 |
| |
| define void @fadd_reductions() { |
| ; Z15-LABEL: 'fadd_reductions' |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void |
| ; |
| %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) |
| %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) |
| %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) |
| %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) |
| %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) |
| ret void |
| } |
| |
| define void @fast_fadd_reductions(ptr %src, ptr %dst) { |
| ; Z15-LABEL: 'fast_fadd_reductions' |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void |
| ; |
| %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) |
| %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) |
| %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) |
| %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) |
| %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) |
| ret void |
| } |
| |
| define void @fmul_reductions() { |
| ; Z15-LABEL: 'fmul_reductions' |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void |
| ; |
| %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef) |
| %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef) |
| %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef) |
| %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef) |
| %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) |
| ret void |
| } |
| |
| define void @fast_fmul_reductions() { |
| ; Z15-LABEL: 'fast_fmul_reductions' |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void |
| ; |
| %fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef) |
| %fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef) |
| %fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef) |
| %fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef) |
| %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) |
| |
| ret void |
| } |
| |
| define void @fmin_reductions() { |
| ; Z15-LABEL: 'fmin_reductions' |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void |
| ; |
| %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) |
| %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) |
| %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) |
| %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) |
| %V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) |
| ret void |
| } |
| |
| define void @fmax_reductions() { |
| ; Z15-LABEL: 'fmax_reductions' |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void |
| ; |
| %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) |
| %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) |
| %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) |
| %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) |
| %V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) |
| ret void |
| } |
| |
| define void @reduceumin() { |
| ; Z15-LABEL: 'reduceumin' |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef) |
| ; |
| %V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) |
| %V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) |
| %V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) |
| %V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) |
| |
| %V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) |
| %V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef) |
| |
| ret void |
| } |
| |
| define void @reduceumax() { |
| ; Z15-LABEL: 'reduceumax' |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef) |
| ; |
| %V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) |
| %V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) |
| %V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) |
| %V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) |
| |
| %V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) |
| %V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef) |
| |
| ret void |
| } |
| |
| define void @reducesmin() { |
| ; Z15-LABEL: 'reducesmin' |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef) |
| ; |
| %V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) |
| %V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) |
| %V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) |
| %V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) |
| |
| %V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) |
| %V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef) |
| |
| ret void |
| } |
| |
| define void @reducesmax() { |
| ; Z15-LABEL: 'reducesmax' |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) |
| ; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef) |
| ; |
| %V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) |
| %V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) |
| %V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) |
| %V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) |
| |
| %V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) |
| %V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef) |
| |
| ret void |
| } |
| |
| define void @reduceadd() { |
| ; Z15-LABEL: 'reduceadd' |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) |
| ; |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) |
| ; Z15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef) |
| |
| ; REDUCEADD64 |
| %V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) |
| %V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) |
| %V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) |
| %V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) |
| ; REDUCEADD32 |
| %V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) |
| %V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) |
| %V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) |
| %V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) |
| ; REDUCEADD16 |
| %V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) |
| %V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) |
| %V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) |
| %V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) |
| ; REDUCEADD8 |
| %V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) |
| %V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) |
| %V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) |
| %V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) |
| ; EXTREME VALUES |
| %V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) |
| %V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef) |
| |
| ret void |
| } |
| |
| define void @reducemul() { |
| ; CHECK-LABEL: 'reducemul' |
| ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) |
| ; |
| ; CHECK: Cost Model: Found an estimated cost of 15 for instruction: %V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) |
| ; CHECK: Cost Model: Found an estimated cost of 28 for instruction: %V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef) |
| |
| ; REDUCEADD64 |
| %V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef) |
| %V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef) |
| %V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef) |
| %V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef) |
| ; REDUCEADD32 |
| %V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef) |
| %V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef) |
| %V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef) |
| %V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef) |
| ; REDUCEADD16 |
| %V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef) |
| %V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef) |
| %V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef) |
| %V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef) |
| ; REDUCEADD8 |
| %V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef) |
| %V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef) |
| %V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef) |
| %V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef) |
| ; EXTREME VALUES |
| %V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef) |
| %V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef) |
| |
| ret void |
| } |
| |
| declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) |
| declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) |
| declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) |
| declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) |
| declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>) |
| |
| declare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>) |
| declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>) |
| declare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>) |
| declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>) |
| declare fp128 @llvm.vector.reduce.fmul.v4f128(fp128, <4 x fp128>) |
| |
| declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) |
| declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) |
| declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) |
| declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) |
| declare fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128>) |
| |
| declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) |
| declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) |
| declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) |
| declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) |
| declare fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128>) |
| |
| declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) |
| declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) |
| declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) |
| declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) |
| declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>) |
| declare i128 @llvm.vector.reduce.umin.v4i128(<4 x i128>) |
| |
| declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) |
| declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) |
| declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) |
| declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) |
| declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>) |
| declare i128 @llvm.vector.reduce.umax.v4i128(<4 x i128>) |
| |
| declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) |
| declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) |
| declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) |
| declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) |
| declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>) |
| declare i128 @llvm.vector.reduce.smin.v4i128(<4 x i128>) |
| |
| declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) |
| declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) |
| declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) |
| declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) |
| declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>) |
| declare i128 @llvm.vector.reduce.smax.v4i128(<4 x i128>) |
| |
| declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) |
| declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) |
| declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) |
| declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) |
| declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) |
| declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) |
| declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) |
| declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) |
| declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) |
| declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) |
| declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) |
| declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) |
| declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) |
| declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) |
| declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) |
| declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) |
| |
| declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>) |
| declare i256 @llvm.vector.reduce.add.v4i256(<4 x i256>) |
| |
| declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>) |
| declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>) |
| declare i64 @llvm.vector.reduce.mul.v8i64(<8 x i64>) |
| declare i64 @llvm.vector.reduce.mul.v16i64(<16 x i64>) |
| declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>) |
| declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) |
| declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) |
| declare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>) |
| declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>) |
| declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>) |
| declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>) |
| declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>) |
| declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>) |
| declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>) |
| declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>) |
| declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>) |
| |
| declare i8 @llvm.vector.reduce.mul.v128i8(<128 x i8>) |
| declare i256 @llvm.vector.reduce.mul.v4i256(<4 x i256>) |