blob: 0def20215e988992a9aa49cc1f47b4aa0e9a3a93 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt -passes='print<cost-model>' -disable-output -mtriple=s390x-unknown-linux \
; RUN: -mcpu=z15 < %s 2>&1 | FileCheck %s --check-prefix=Z15
define void @fadd_reductions() {
; Z15-LABEL: 'fadd_reductions'
; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
%fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
%fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
%fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
%fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
ret void
}
define void @fast_fadd_reductions(ptr %src, ptr %dst) {
; Z15-LABEL: 'fast_fadd_reductions'
; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
%fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
%fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
%fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
%fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
ret void
}
define void @fmul_reductions() {
; Z15-LABEL: 'fmul_reductions'
; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
%fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
%fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
%fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
%fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
ret void
}
define void @fast_fmul_reductions() {
; Z15-LABEL: 'fast_fmul_reductions'
; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
%fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
%fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
%fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
%fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
ret void
}
define void @fmin_reductions() {
; Z15-LABEL: 'fmin_reductions'
; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
%V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
%V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
%V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
%V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
ret void
}
define void @fmax_reductions() {
; Z15-LABEL: 'fmax_reductions'
; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
%V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
%V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
%V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
%V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
ret void
}
define void @reduceumin() {
; Z15-LABEL: 'reduceumin'
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef)
;
%V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
%V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
%V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
%V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
%V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
%V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef)
ret void
}
define void @reduceumax() {
; Z15-LABEL: 'reduceumax'
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef)
;
%V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
%V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
%V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
%V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
%V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
%V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef)
ret void
}
define void @reducesmin() {
; Z15-LABEL: 'reducesmin'
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef)
;
%V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
%V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
%V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
%V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
%V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
%V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef)
ret void
}
define void @reducesmax() {
; Z15-LABEL: 'reducesmax'
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef)
;
%V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
%V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
%V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
%V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
%V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
%V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef)
ret void
}
define void @reduceadd() {
; Z15-LABEL: 'reduceadd'
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
;
; Z15-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef)
; REDUCEADD64
%V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
%V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
%V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
%V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
; REDUCEADD32
%V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
%V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
%V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
%V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
; REDUCEADD16
%V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
%V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
%V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
%V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
; REDUCEADD8
%V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
%V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
%V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
%V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
; EXTREME VALUES
%V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
%V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef)
ret void
}
define void @reducemul() {
; CHECK-LABEL: 'reducemul'
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
;
; CHECK: Cost Model: Found an estimated cost of 15 for instruction: %V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
; CHECK: Cost Model: Found an estimated cost of 28 for instruction: %V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef)
; REDUCEADD64
%V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
%V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
%V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
%V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
; REDUCEADD32
%V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
%V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
%V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
%V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
; REDUCEADD16
%V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
%V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
%V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
%V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
; REDUCEADD8
%V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
%V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
%V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
%V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
; EXTREME VALUES
%V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
%V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef)
ret void
}
declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>)
declare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>)
declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>)
declare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>)
declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>)
declare fp128 @llvm.vector.reduce.fmul.v4f128(fp128, <4 x fp128>)
declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
declare fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128>)
declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
declare fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128>)
declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>)
declare i128 @llvm.vector.reduce.umin.v4i128(<4 x i128>)
declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>)
declare i128 @llvm.vector.reduce.umax.v4i128(<4 x i128>)
declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>)
declare i128 @llvm.vector.reduce.smin.v4i128(<4 x i128>)
declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>)
declare i128 @llvm.vector.reduce.smax.v4i128(<4 x i128>)
declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>)
declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>)
declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>)
declare i256 @llvm.vector.reduce.add.v4i256(<4 x i256>)
declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>)
declare i64 @llvm.vector.reduce.mul.v8i64(<8 x i64>)
declare i64 @llvm.vector.reduce.mul.v16i64(<16 x i64>)
declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>)
declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>)
declare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>)
declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>)
declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>)
declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>)
declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>)
declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>)
declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>)
declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>)
declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>)
declare i8 @llvm.vector.reduce.mul.v128i8(<128 x i8>)
declare i256 @llvm.vector.reduce.mul.v4i256(<4 x i256>)