llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll - llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s --check-prefixes=CHECK,ARGBASED
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin -intrinsic-cost-strategy=type-based-intrinsic-cost | FileCheck %s --check-prefixes=CHECK,TYPEBASED

 define void @unsupported_fp_ops(<vscale x 4 x float> %vec, i32 %extraarg) {
 ; CHECK-LABEL: 'unsupported_fp_ops'
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %pow = call <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %vec, <vscale x 4 x float> %vec)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> %vec, i32 %extraarg)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %pow = call <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %vec, <vscale x 4 x float> %vec)
   %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> %vec, i32 %extraarg)
   ret void
 }

 define void @powi(<vscale x 4 x float> %vec) {
 ; ARGBASED-LABEL: 'powi'
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> %vec, i32 42)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'powi'
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> %vec, i32 42)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> %vec, i32 42)
   ret void
 }

 define void @fshr(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c) {
 ; CHECK-LABEL: 'fshr'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <vscale x 1 x i32> @llvm.fshr.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call <vscale x 1 x i32> @llvm.fshr.nxv4i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
   ret void
 }

 define void @fshl(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c) {
 ; CHECK-LABEL: 'fshl'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <vscale x 1 x i32> @llvm.fshl.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call <vscale x 1 x i32> @llvm.fshl.nxv4i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
   ret void
 }

 define void @vp_fshr() {
 ; CHECK-LABEL: 'vp_fshr'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x i8> @llvm.vp.fshr.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x i8> @llvm.vp.fshr.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x i8> @llvm.vp.fshr.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x i8> @llvm.vp.fshr.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x i8> @llvm.vp.fshr.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x i8> @llvm.vp.fshr.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x i8> @llvm.vp.fshr.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x i8> @llvm.vp.fshr.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x i8> @llvm.vp.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <vscale x 32 x i8> @llvm.vp.fshr.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <vscale x 64 x i8> @llvm.vp.fshr.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x i16> @llvm.vp.fshr.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x i16> @llvm.vp.fshr.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x i16> @llvm.vp.fshr.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <16 x i16> @llvm.vp.fshr.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 1 x i16> @llvm.vp.fshr.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 2 x i16> @llvm.vp.fshr.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 4 x i16> @llvm.vp.fshr.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <vscale x 8 x i16> @llvm.vp.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call <vscale x 16 x i16> @llvm.vp.fshr.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call <vscale x 32 x i16> @llvm.vp.fshr.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call <2 x i32> @llvm.vp.fshr.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call <4 x i32> @llvm.vp.fshr.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call <8 x i32> @llvm.vp.fshr.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call <16 x i32> @llvm.vp.fshr.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 1 x i32> @llvm.vp.fshr.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %27 = call <vscale x 2 x i32> @llvm.vp.fshr.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %28 = call <vscale x 4 x i32> @llvm.vp.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %29 = call <vscale x 8 x i32> @llvm.vp.fshr.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %30 = call <vscale x 16 x i32> @llvm.vp.fshr.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %31 = call <2 x i64> @llvm.vp.fshr.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %32 = call <4 x i64> @llvm.vp.fshr.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %33 = call <8 x i64> @llvm.vp.fshr.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %34 = call <16 x i64> @llvm.vp.fshr.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %35 = call <vscale x 1 x i64> @llvm.vp.fshr.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %36 = call <vscale x 2 x i64> @llvm.vp.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %37 = call <vscale x 4 x i64> @llvm.vp.fshr.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %38 = call <vscale x 8 x i64> @llvm.vp.fshr.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call <2 x i8> @llvm.vp.fshr.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
   call <4 x i8> @llvm.vp.fshr.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
   call <8 x i8> @llvm.vp.fshr.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
   call <16 x i8> @llvm.vp.fshr.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
   call <vscale x 1 x i8> @llvm.vp.fshr.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
   call <vscale x 2 x i8> @llvm.vp.fshr.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
   call <vscale x 4 x i8> @llvm.vp.fshr.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
   call <vscale x 8 x i8> @llvm.vp.fshr.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
   call <vscale x 16 x i8> @llvm.vp.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
   call <vscale x 32 x i8> @llvm.vp.fshr.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
   call <vscale x 64 x i8> @llvm.vp.fshr.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
   call <2 x i16> @llvm.vp.fshr.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
   call <4 x i16> @llvm.vp.fshr.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
   call <8 x i16> @llvm.vp.fshr.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
   call <16 x i16> @llvm.vp.fshr.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
   call <vscale x 1 x i16> @llvm.vp.fshr.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
   call <vscale x 2 x i16> @llvm.vp.fshr.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
   call <vscale x 4 x i16> @llvm.vp.fshr.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
   call <vscale x 8 x i16> @llvm.vp.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
   call <vscale x 16 x i16> @llvm.vp.fshr.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
   call <vscale x 32 x i16> @llvm.vp.fshr.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
   call <2 x i32> @llvm.vp.fshr.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
   call <4 x i32> @llvm.vp.fshr.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
   call <8 x i32> @llvm.vp.fshr.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
   call <16 x i32> @llvm.vp.fshr.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
   call <vscale x 1 x i32> @llvm.vp.fshr.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
   call <vscale x 2 x i32> @llvm.vp.fshr.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
   call <vscale x 4 x i32> @llvm.vp.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
   call <vscale x 8 x i32> @llvm.vp.fshr.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
   call <vscale x 16 x i32> @llvm.vp.fshr.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x i64> @llvm.vp.fshr.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
   call <4 x i64> @llvm.vp.fshr.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
   call <8 x i64> @llvm.vp.fshr.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
   call <16 x i64> @llvm.vp.fshr.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
   call <vscale x 1 x i64> @llvm.vp.fshr.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
   call <vscale x 2 x i64> @llvm.vp.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
   call <vscale x 4 x i64> @llvm.vp.fshr.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
   call <vscale x 8 x i64> @llvm.vp.fshr.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
   ret void
 }

 define void @vp_fshl() {
 ; CHECK-LABEL: 'vp_fshl'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x i8> @llvm.vp.fshl.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x i8> @llvm.vp.fshl.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x i8> @llvm.vp.fshl.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x i8> @llvm.vp.fshl.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x i8> @llvm.vp.fshl.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x i8> @llvm.vp.fshl.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x i8> @llvm.vp.fshl.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x i8> @llvm.vp.fshl.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x i8> @llvm.vp.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <vscale x 32 x i8> @llvm.vp.fshl.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <vscale x 64 x i8> @llvm.vp.fshl.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x i16> @llvm.vp.fshl.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x i16> @llvm.vp.fshl.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x i16> @llvm.vp.fshl.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <16 x i16> @llvm.vp.fshl.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 1 x i16> @llvm.vp.fshl.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 2 x i16> @llvm.vp.fshl.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 4 x i16> @llvm.vp.fshl.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <vscale x 8 x i16> @llvm.vp.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call <vscale x 16 x i16> @llvm.vp.fshl.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call <vscale x 32 x i16> @llvm.vp.fshl.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call <2 x i32> @llvm.vp.fshl.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call <4 x i32> @llvm.vp.fshl.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call <8 x i32> @llvm.vp.fshl.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call <16 x i32> @llvm.vp.fshl.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 1 x i32> @llvm.vp.fshl.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %27 = call <vscale x 2 x i32> @llvm.vp.fshl.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %28 = call <vscale x 4 x i32> @llvm.vp.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %29 = call <vscale x 8 x i32> @llvm.vp.fshl.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %30 = call <vscale x 16 x i32> @llvm.vp.fshl.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %31 = call <2 x i64> @llvm.vp.fshl.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %32 = call <4 x i64> @llvm.vp.fshl.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %33 = call <8 x i64> @llvm.vp.fshl.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %34 = call <16 x i64> @llvm.vp.fshl.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %35 = call <vscale x 1 x i64> @llvm.vp.fshl.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %36 = call <vscale x 2 x i64> @llvm.vp.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %37 = call <vscale x 4 x i64> @llvm.vp.fshl.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %38 = call <vscale x 8 x i64> @llvm.vp.fshl.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call <2 x i8> @llvm.vp.fshl.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
   call <4 x i8> @llvm.vp.fshl.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
   call <8 x i8> @llvm.vp.fshl.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
   call <16 x i8> @llvm.vp.fshl.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
   call <vscale x 1 x i8> @llvm.vp.fshl.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
   call <vscale x 2 x i8> @llvm.vp.fshl.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
   call <vscale x 4 x i8> @llvm.vp.fshl.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
   call <vscale x 8 x i8> @llvm.vp.fshl.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
   call <vscale x 16 x i8> @llvm.vp.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
   call <vscale x 32 x i8> @llvm.vp.fshl.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
   call <vscale x 64 x i8> @llvm.vp.fshl.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
   call <2 x i16> @llvm.vp.fshl.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
   call <4 x i16> @llvm.vp.fshl.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
   call <8 x i16> @llvm.vp.fshl.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
   call <16 x i16> @llvm.vp.fshl.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
   call <vscale x 1 x i16> @llvm.vp.fshl.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
   call <vscale x 2 x i16> @llvm.vp.fshl.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
   call <vscale x 4 x i16> @llvm.vp.fshl.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
   call <vscale x 8 x i16> @llvm.vp.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
   call <vscale x 16 x i16> @llvm.vp.fshl.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
   call <vscale x 32 x i16> @llvm.vp.fshl.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
   call <2 x i32> @llvm.vp.fshl.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
   call <4 x i32> @llvm.vp.fshl.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
   call <8 x i32> @llvm.vp.fshl.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
   call <16 x i32> @llvm.vp.fshl.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
   call <vscale x 1 x i32> @llvm.vp.fshl.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
   call <vscale x 2 x i32> @llvm.vp.fshl.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
   call <vscale x 4 x i32> @llvm.vp.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
   call <vscale x 8 x i32> @llvm.vp.fshl.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
   call <vscale x 16 x i32> @llvm.vp.fshl.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x i64> @llvm.vp.fshl.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
   call <4 x i64> @llvm.vp.fshl.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
   call <8 x i64> @llvm.vp.fshl.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
   call <16 x i64> @llvm.vp.fshl.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
   call <vscale x 1 x i64> @llvm.vp.fshl.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
   call <vscale x 2 x i64> @llvm.vp.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
   call <vscale x 4 x i64> @llvm.vp.fshl.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
   call <vscale x 8 x i64> @llvm.vp.fshl.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
   ret void
 }

 define void @add() {
 ; CHECK-LABEL: 'add'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t1 = add <2 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t3 = add <4 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t5 = add <8 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t7 = add <16 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t9 = add <2 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t11 = add <4 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t13 = add <8 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t15 = add <16 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t17 = add <2 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t19 = add <4 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t21 = add <8 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t23 = add <16 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t25 = add <2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t27 = add <4 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t29 = add <8 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t31 = add <16 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t33 = add <vscale x 2 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t35 = add <vscale x 4 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t37 = add <vscale x 8 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t39 = add <vscale x 16 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t41 = add <vscale x 2 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t43 = add <vscale x 4 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t45 = add <vscale x 8 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t47 = add <vscale x 16 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t49 = add <vscale x 2 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t51 = add <vscale x 4 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t53 = add <vscale x 8 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t55 = add <vscale x 16 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t57 = add <vscale x 2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t59 = add <vscale x 4 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t61 = add <vscale x 8 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.add.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t63 = add <vscale x 16 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t0 = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
   %t1 = add <2 x i8> undef, undef
   %t2 = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
   %t3 = add <4 x i8> undef, undef
   %t4 = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
   %t5 = add <8 x i8> undef, undef
   %t6 = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
   %t7 = add <16 x i8> undef, undef
   %t8 = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
   %t9 = add <2 x i16> undef, undef
   %t10 = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
   %t11 = add <4 x i16> undef, undef
   %t12 = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
   %t13 = add <8 x i16> undef, undef
   %t14 = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
   %t15 = add <16 x i16> undef, undef
   %t16 = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
   %t17 = add <2 x i32> undef, undef
   %t18 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
   %t19 = add <4 x i32> undef, undef
   %t20 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
   %t21 = add <8 x i32> undef, undef
   %t22 = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
   %t23 = add <16 x i32> undef, undef
   %t24 = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
   %t25 = add <2 x i64> undef, undef
   %t26 = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
   %t27 = add <4 x i64> undef, undef
   %t28 = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
   %t29 = add <8 x i64> undef, undef
   %t30 = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
   %t31 = add <16 x i64> undef, undef
   %t32 = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
   %t33 = add <vscale x 2 x i8> undef, undef
   %t34 = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
   %t35 = add <vscale x 4 x i8> undef, undef
   %t36 = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
   %t37 = add <vscale x 8 x i8> undef, undef
   %t38 = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
   %t39 = add <vscale x 16 x i8> undef, undef
   %t40 = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
   %t41 = add <vscale x 2 x i16> undef, undef
   %t42 = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
   %t43 = add <vscale x 4 x i16> undef, undef
   %t44 = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
   %t45 = add <vscale x 8 x i16> undef, undef
   %t46 = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
   %t47 = add <vscale x 16 x i16> undef, undef
   %t48 = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
   %t49 = add <vscale x 2 x i32> undef, undef
   %t50 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
   %t51 = add <vscale x 4 x i32> undef, undef
   %t52 = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
   %t53 = add <vscale x 8 x i32> undef, undef
   %t54 = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
   %t55 = add <vscale x 16 x i32> undef, undef
   %t56 = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
   %t57 = add <vscale x 2 x i64> undef, undef
   %t58 = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
   %t59 = add <vscale x 4 x i64> undef, undef
   %t60 = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
   %t61 = add <vscale x 8 x i64> undef, undef
   %t62 = call <vscale x 16 x i64> @llvm.vp.add.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
   %t63 = add <vscale x 16 x i64> undef, undef
   ret void
 }

 define void @and() {
 ; CHECK-LABEL: 'and'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t1 = and <2 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.and.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t3 = and <4 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.and.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t5 = and <8 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.and.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t7 = and <16 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.and.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t9 = and <2 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.and.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t11 = and <4 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.and.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t13 = and <8 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.and.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t15 = and <16 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.and.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t17 = and <2 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.and.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t19 = and <4 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t21 = and <8 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.and.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t23 = and <16 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.and.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t25 = and <2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.and.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t27 = and <4 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.and.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t29 = and <8 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.and.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t31 = and <16 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.and.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t33 = and <vscale x 2 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.and.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t35 = and <vscale x 4 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.and.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t37 = and <vscale x 8 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.and.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t39 = and <vscale x 16 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.and.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t41 = and <vscale x 2 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.and.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t43 = and <vscale x 4 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.and.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t45 = and <vscale x 8 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.and.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t47 = and <vscale x 16 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.and.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t49 = and <vscale x 2 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t51 = and <vscale x 4 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.and.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t53 = and <vscale x 8 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.and.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t55 = and <vscale x 16 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.and.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t57 = and <vscale x 2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.and.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t59 = and <vscale x 4 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.and.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t61 = and <vscale x 8 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.and.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t63 = and <vscale x 16 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t0 = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
   %t1 = and <2 x i8> undef, undef
   %t2 = call <4 x i8> @llvm.vp.and.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
   %t3 = and <4 x i8> undef, undef
   %t4 = call <8 x i8> @llvm.vp.and.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
   %t5 = and <8 x i8> undef, undef
   %t6 = call <16 x i8> @llvm.vp.and.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
   %t7 = and <16 x i8> undef, undef
   %t8 = call <2 x i16> @llvm.vp.and.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
   %t9 = and <2 x i16> undef, undef
   %t10 = call <4 x i16> @llvm.vp.and.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
   %t11 = and <4 x i16> undef, undef
   %t12 = call <8 x i16> @llvm.vp.and.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
   %t13 = and <8 x i16> undef, undef
   %t14 = call <16 x i16> @llvm.vp.and.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
   %t15 = and <16 x i16> undef, undef
   %t16 = call <2 x i32> @llvm.vp.and.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
   %t17 = and <2 x i32> undef, undef
   %t18 = call <4 x i32> @llvm.vp.and.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
   %t19 = and <4 x i32> undef, undef
   %t20 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
   %t21 = and <8 x i32> undef, undef
   %t22 = call <16 x i32> @llvm.vp.and.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
   %t23 = and <16 x i32> undef, undef
   %t24 = call <2 x i64> @llvm.vp.and.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
   %t25 = and <2 x i64> undef, undef
   %t26 = call <4 x i64> @llvm.vp.and.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
   %t27 = and <4 x i64> undef, undef
   %t28 = call <8 x i64> @llvm.vp.and.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
   %t29 = and <8 x i64> undef, undef %t30 = call <16 x i64> @llvm.vp.and.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
   %t31 = and <16 x i64> undef, undef
   %t32 = call <vscale x 2 x i8> @llvm.vp.and.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
   %t33 = and <vscale x 2 x i8> undef, undef
   %t34 = call <vscale x 4 x i8> @llvm.vp.and.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
   %t35 = and <vscale x 4 x i8> undef, undef
   %t36 = call <vscale x 8 x i8> @llvm.vp.and.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
   %t37 = and <vscale x 8 x i8> undef, undef
   %t38 = call <vscale x 16 x i8> @llvm.vp.and.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
   %t39 = and <vscale x 16 x i8> undef, undef
   %t40 = call <vscale x 2 x i16> @llvm.vp.and.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
   %t41 = and <vscale x 2 x i16> undef, undef
   %t42 = call <vscale x 4 x i16> @llvm.vp.and.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
   %t43 = and <vscale x 4 x i16> undef, undef
   %t44 = call <vscale x 8 x i16> @llvm.vp.and.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
   %t45 = and <vscale x 8 x i16> undef, undef
   %t46 = call <vscale x 16 x i16> @llvm.vp.and.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
   %t47 = and <vscale x 16 x i16> undef, undef
   %t48 = call <vscale x 2 x i32> @llvm.vp.and.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
   %t49 = and <vscale x 2 x i32> undef, undef
   %t50 = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
   %t51 = and <vscale x 4 x i32> undef, undef
   %t52 = call <vscale x 8 x i32> @llvm.vp.and.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
   %t53 = and <vscale x 8 x i32> undef, undef
   %t54 = call <vscale x 16 x i32> @llvm.vp.and.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
   %t55 = and <vscale x 16 x i32> undef, undef
   %t56 = call <vscale x 2 x i64> @llvm.vp.and.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
   %t57 = and <vscale x 2 x i64> undef, undef
   %t58 = call <vscale x 4 x i64> @llvm.vp.and.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
   %t59 = and <vscale x 4 x i64> undef, undef
   %t60 = call <vscale x 8 x i64> @llvm.vp.and.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
   %t61 = and <vscale x 8 x i64> undef, undef
   %t62 = call <vscale x 16 x i64> @llvm.vp.and.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
   %t63 = and <vscale x 16 x i64> undef, undef
   ret void
 }


 define void @smax() {
 ; CHECK-LABEL: 'smax'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.smax.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.smax.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.smax.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.smax.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.smax.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.smax.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.smax.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.smax.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.smax.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.smax.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.smax.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.smax.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.smax.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.smax.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.smax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.smax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.smax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.smax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.smax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.smax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.smax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.smax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.smax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.smax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.smax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.smax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.smax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.smax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.smax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.smax.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t0 = call <2 x i8> @llvm.vp.smax.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
   %t2 = call <4 x i8> @llvm.vp.smax.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
   %t4 = call <8 x i8> @llvm.vp.smax.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
   %t6 = call <16 x i8> @llvm.vp.smax.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
   %t8 = call <2 x i16> @llvm.vp.smax.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
   %t10 = call <4 x i16> @llvm.vp.smax.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
   %t12 = call <8 x i16> @llvm.vp.smax.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
   %t14 = call <16 x i16> @llvm.vp.smax.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
   %t16 = call <2 x i32> @llvm.vp.smax.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
   %t18 = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
   %t20 = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
   %t22 = call <16 x i32> @llvm.vp.smax.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
   %t24 = call <2 x i64> @llvm.vp.smax.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
   %t26 = call <4 x i64> @llvm.vp.smax.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
   %t28 = call <8 x i64> @llvm.vp.smax.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
   %t30 = call <16 x i64> @llvm.vp.smax.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
   %t32 = call <vscale x 2 x i8> @llvm.vp.smax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
   %t34 = call <vscale x 4 x i8> @llvm.vp.smax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
   %t36 = call <vscale x 8 x i8> @llvm.vp.smax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
   %t38 = call <vscale x 16 x i8> @llvm.vp.smax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
   %t40 = call <vscale x 2 x i16> @llvm.vp.smax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
   %t42 = call <vscale x 4 x i16> @llvm.vp.smax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
   %t44 = call <vscale x 8 x i16> @llvm.vp.smax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
   %t46 = call <vscale x 16 x i16> @llvm.vp.smax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
   %t48 = call <vscale x 2 x i32> @llvm.vp.smax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
   %t50 = call <vscale x 4 x i32> @llvm.vp.smax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
   %t52 = call <vscale x 8 x i32> @llvm.vp.smax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
   %t54 = call <vscale x 16 x i32> @llvm.vp.smax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
   %t56 = call <vscale x 2 x i64> @llvm.vp.smax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
   %t58 = call <vscale x 4 x i64> @llvm.vp.smax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
   %t60 = call <vscale x 8 x i64> @llvm.vp.smax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
   %t62 = call <vscale x 16 x i64> @llvm.vp.smax.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }


 define void @smin() {
 ; CHECK-LABEL: 'smin'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.smin.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.smin.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.smin.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.smin.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.smin.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.smin.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.smin.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.smin.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.smin.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.smin.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.smin.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.smin.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.smin.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.smin.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.smin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.smin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.smin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.smin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.smin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.smin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.smin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.smin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.smin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.smin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.smin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.smin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.smin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.smin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.smin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.smin.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t0 = call <2 x i8> @llvm.vp.smin.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
   %t2 = call <4 x i8> @llvm.vp.smin.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
   %t4 = call <8 x i8> @llvm.vp.smin.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
   %t6 = call <16 x i8> @llvm.vp.smin.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
   %t8 = call <2 x i16> @llvm.vp.smin.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
   %t10 = call <4 x i16> @llvm.vp.smin.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
   %t12 = call <8 x i16> @llvm.vp.smin.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
   %t14 = call <16 x i16> @llvm.vp.smin.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
   %t16 = call <2 x i32> @llvm.vp.smin.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
   %t18 = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
   %t20 = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
   %t22 = call <16 x i32> @llvm.vp.smin.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
   %t24 = call <2 x i64> @llvm.vp.smin.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
   %t26 = call <4 x i64> @llvm.vp.smin.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
   %t28 = call <8 x i64> @llvm.vp.smin.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
   %t30 = call <16 x i64> @llvm.vp.smin.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
   %t32 = call <vscale x 2 x i8> @llvm.vp.smin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
   %t34 = call <vscale x 4 x i8> @llvm.vp.smin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
   %t36 = call <vscale x 8 x i8> @llvm.vp.smin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
   %t38 = call <vscale x 16 x i8> @llvm.vp.smin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
   %t40 = call <vscale x 2 x i16> @llvm.vp.smin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
   %t42 = call <vscale x 4 x i16> @llvm.vp.smin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
   %t44 = call <vscale x 8 x i16> @llvm.vp.smin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
   %t46 = call <vscale x 16 x i16> @llvm.vp.smin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
   %t48 = call <vscale x 2 x i32> @llvm.vp.smin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
   %t50 = call <vscale x 4 x i32> @llvm.vp.smin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
   %t52 = call <vscale x 8 x i32> @llvm.vp.smin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
   %t54 = call <vscale x 16 x i32> @llvm.vp.smin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
   %t56 = call <vscale x 2 x i64> @llvm.vp.smin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
   %t58 = call <vscale x 4 x i64> @llvm.vp.smin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
   %t60 = call <vscale x 8 x i64> @llvm.vp.smin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
   %t62 = call <vscale x 16 x i64> @llvm.vp.smin.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }

 define void @umax() {
 ; CHECK-LABEL: 'umax'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.umax.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.umax.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.umax.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.umax.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.umax.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.umax.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.umax.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.umax.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.umax.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.umax.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.umax.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.umax.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.umax.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.umax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.umax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.umax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.umax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.umax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.umax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.umax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.umax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.umax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.umax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.umax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.umax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.umax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.umax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.umax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.umax.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t0 = call <2 x i8> @llvm.vp.umax.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
   %t2 = call <4 x i8> @llvm.vp.umax.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
   %t4 = call <8 x i8> @llvm.vp.umax.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
   %t6 = call <16 x i8> @llvm.vp.umax.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
   %t8 = call <2 x i16> @llvm.vp.umax.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
   %t10 = call <4 x i16> @llvm.vp.umax.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
   %t12 = call <8 x i16> @llvm.vp.umax.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
   %t14 = call <16 x i16> @llvm.vp.umax.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
   %t16 = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
   %t18 = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
   %t20 = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
   %t22 = call <16 x i32> @llvm.vp.umax.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
   %t24 = call <2 x i64> @llvm.vp.umax.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
   %t26 = call <4 x i64> @llvm.vp.umax.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
   %t28 = call <8 x i64> @llvm.vp.umax.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
   %t30 = call <16 x i64> @llvm.vp.umax.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
   %t32 = call <vscale x 2 x i8> @llvm.vp.umax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
   %t34 = call <vscale x 4 x i8> @llvm.vp.umax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
   %t36 = call <vscale x 8 x i8> @llvm.vp.umax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
   %t38 = call <vscale x 16 x i8> @llvm.vp.umax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
   %t40 = call <vscale x 2 x i16> @llvm.vp.umax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
   %t42 = call <vscale x 4 x i16> @llvm.vp.umax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
   %t44 = call <vscale x 8 x i16> @llvm.vp.umax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
   %t46 = call <vscale x 16 x i16> @llvm.vp.umax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
   %t48 = call <vscale x 2 x i32> @llvm.vp.umax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
   %t50 = call <vscale x 4 x i32> @llvm.vp.umax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
   %t52 = call <vscale x 8 x i32> @llvm.vp.umax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
   %t54 = call <vscale x 16 x i32> @llvm.vp.umax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
   %t56 = call <vscale x 2 x i64> @llvm.vp.umax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
   %t58 = call <vscale x 4 x i64> @llvm.vp.umax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
   %t60 = call <vscale x 8 x i64> @llvm.vp.umax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
   %t62 = call <vscale x 16 x i64> @llvm.vp.umax.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }

 define void @umin() {
 ; CHECK-LABEL: 'umin'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.umin.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.umin.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.umin.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.umin.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.umin.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.umin.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.umin.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.umin.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.umin.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.umin.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.umin.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.umin.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.umin.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.umin.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.umin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.umin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.umin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.umin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.umin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.umin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.umin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.umin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.umin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.umin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.umin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.umin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.umin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.umin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.umin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.umin.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t0 = call <2 x i8> @llvm.vp.umin.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
   %t2 = call <4 x i8> @llvm.vp.umin.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
   %t4 = call <8 x i8> @llvm.vp.umin.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
   %t6 = call <16 x i8> @llvm.vp.umin.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
   %t8 = call <2 x i16> @llvm.vp.umin.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
   %t10 = call <4 x i16> @llvm.vp.umin.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
   %t12 = call <8 x i16> @llvm.vp.umin.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
   %t14 = call <16 x i16> @llvm.vp.umin.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
   %t16 = call <2 x i32> @llvm.vp.umin.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
   %t18 = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
   %t20 = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
   %t22 = call <16 x i32> @llvm.vp.umin.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
   %t24 = call <2 x i64> @llvm.vp.umin.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
   %t26 = call <4 x i64> @llvm.vp.umin.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
   %t28 = call <8 x i64> @llvm.vp.umin.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
   %t30 = call <16 x i64> @llvm.vp.umin.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
   %t32 = call <vscale x 2 x i8> @llvm.vp.umin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
   %t34 = call <vscale x 4 x i8> @llvm.vp.umin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
   %t36 = call <vscale x 8 x i8> @llvm.vp.umin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
   %t38 = call <vscale x 16 x i8> @llvm.vp.umin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
   %t40 = call <vscale x 2 x i16> @llvm.vp.umin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
   %t42 = call <vscale x 4 x i16> @llvm.vp.umin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
   %t44 = call <vscale x 8 x i16> @llvm.vp.umin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
   %t46 = call <vscale x 16 x i16> @llvm.vp.umin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
   %t48 = call <vscale x 2 x i32> @llvm.vp.umin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
   %t50 = call <vscale x 4 x i32> @llvm.vp.umin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
   %t52 = call <vscale x 8 x i32> @llvm.vp.umin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
   %t54 = call <vscale x 16 x i32> @llvm.vp.umin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
   %t56 = call <vscale x 2 x i64> @llvm.vp.umin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
   %t58 = call <vscale x 4 x i64> @llvm.vp.umin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
   %t60 = call <vscale x 8 x i64> @llvm.vp.umin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
   %t62 = call <vscale x 16 x i64> @llvm.vp.umin.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }

 define void @abs() {
 ; CHECK-LABEL: 'abs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x i8> @llvm.vp.abs.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x i8> @llvm.vp.abs.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <8 x i8> @llvm.vp.abs.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <16 x i8> @llvm.vp.abs.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %12 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %13 = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %14 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %15 = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %16 = call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.abs.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 2 x i8> @llvm.abs.nxv2i8(<vscale x 2 x i8> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = call <vscale x 4 x i8> @llvm.vp.abs.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = call <vscale x 4 x i8> @llvm.abs.nxv4i8(<vscale x 4 x i8> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = call <vscale x 8 x i8> @llvm.vp.abs.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = call <vscale x 8 x i8> @llvm.abs.nxv8i8(<vscale x 8 x i8> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = call <vscale x 16 x i8> @llvm.vp.abs.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %25 = call <vscale x 2 x i64> @llvm.vp.abs.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %26 = call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %27 = call <vscale x 4 x i64> @llvm.vp.abs.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %28 = call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %29 = call <vscale x 8 x i64> @llvm.vp.abs.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %30 = call <vscale x 8 x i64> @llvm.abs.nxv8i64(<vscale x 8 x i64> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %31 = call <vscale x 16 x i64> @llvm.vp.abs.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %32 = call <vscale x 16 x i64> @llvm.abs.nxv16i64(<vscale x 16 x i64> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call <2 x i8> @llvm.vp.abs.v2i8(<2 x i8> undef, i1 0, <2 x i1> undef, i32 undef)
   call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 0)
   call <4 x i8> @llvm.vp.abs.v4i8(<4 x i8> undef, i1 0, <4 x i1> undef, i32 undef)
   call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 0)
   call <8 x i8> @llvm.vp.abs.v8i8(<8 x i8> undef, i1 0, <8 x i1> undef, i32 undef)
   call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 0)
   call <16 x i8> @llvm.vp.abs.v16i8(<16 x i8> undef, i1 0, <16 x i1> undef, i32 undef)
   call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 0)
   call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> undef, i1 0, <2 x i1> undef, i32 undef)
   call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 0)
   call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 0, <4 x i1> undef, i32 undef)
   call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 0)
   call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 0, <8 x i1> undef, i32 undef)
   call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 0)
   call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 0, <16 x i1> undef, i32 undef)
   call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 0)
   call <vscale x 2 x i8> @llvm.vp.abs.nxv2i8(<vscale x 2 x i8> undef, i1 0, <vscale x 2 x i1> undef, i32 undef)
   call <vscale x 2 x i8> @llvm.abs.nxv2i8(<vscale x 2 x i8> undef, i1 0)
   call <vscale x 4 x i8> @llvm.vp.abs.nxv4i8(<vscale x 4 x i8> undef, i1 0, <vscale x 4 x i1> undef, i32 undef)
   call <vscale x 4 x i8> @llvm.abs.nxv4i8(<vscale x 4 x i8> undef, i1 0)
   call <vscale x 8 x i8> @llvm.vp.abs.nxv8i8(<vscale x 8 x i8> undef, i1 0, <vscale x 8 x i1> undef, i32 undef)
   call <vscale x 8 x i8> @llvm.abs.nxv8i8(<vscale x 8 x i8> undef, i1 0)
   call <vscale x 16 x i8> @llvm.vp.abs.nxv16i8(<vscale x 16 x i8> undef, i1 0, <vscale x 16 x i1> undef, i32 undef)
   call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> undef, i1 0)
   call <vscale x 2 x i64> @llvm.vp.abs.nxv2i64(<vscale x 2 x i64> undef, i1 0, <vscale x 2 x i1> undef, i32 undef)
   call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> undef, i1 0)
   call <vscale x 4 x i64> @llvm.vp.abs.nxv4i64(<vscale x 4 x i64> undef, i1 0, <vscale x 4 x i1> undef, i32 undef)
   call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> undef, i1 0)
   call <vscale x 8 x i64> @llvm.vp.abs.nxv8i64(<vscale x 8 x i64> undef, i1 0, <vscale x 8 x i1> undef, i32 undef)
   call <vscale x 8 x i64> @llvm.abs.nxv8i64(<vscale x 8 x i64> undef, i1 0)
   call <vscale x 16 x i64> @llvm.vp.abs.nxv16i64(<vscale x 16 x i64> undef, i1 0, <vscale x 16 x i1> undef, i32 undef)
   call <vscale x 16 x i64> @llvm.abs.nxv16i64(<vscale x 16 x i64> undef, i1 0)
   ret void
 }

 define void @load(ptr %src) {
 ; CHECK-LABEL: 'load'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %src, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t1 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %src, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t3 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr %src, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t5 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %src, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t7 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %src, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t9 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %src, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t11 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t12 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr %src, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t13 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t14 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr %src, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t15 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t17 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t19 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t20 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t21 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t22 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t23 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t24 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t25 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t26 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t27 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t28 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t29 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t30 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t31 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr %src, <2 x i1> undef, i32 undef)
   %t1 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef)
   %t2 = call <4 x i8> @llvm.vp.load.v4i8(ptr %src, <4 x i1> undef, i32 undef)
   %t3 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef)
   %t4 = call <8 x i8> @llvm.vp.load.v8i8(ptr %src, <8 x i1> undef, i32 undef)
   %t5 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef)
   %t6 = call <16 x i8> @llvm.vp.load.v16i8(ptr %src, <16 x i1> undef, i32 undef)
   %t7 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef)
   %t8 = call <2 x i64> @llvm.vp.load.v2i64(ptr %src, <2 x i1> undef, i32 undef)
   %t9 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef)
   %t10 = call <4 x i64> @llvm.vp.load.v4i64(ptr %src, <4 x i1> undef, i32 undef)
   %t11 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef)
   %t12 = call <8 x i64> @llvm.vp.load.v8i64(ptr %src, <8 x i1> undef, i32 undef)
   %t13 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef)
   %t14 = call <16 x i64> @llvm.vp.load.v16i64(ptr %src, <16 x i1> undef, i32 undef)
   %t15 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef)
   %t16 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptr %src, <vscale x 2 x i1> undef, i32 undef)
   %t17 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, <vscale x 2 x i1> undef, i32 undef)
   %t18 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptr %src, <vscale x 4 x i1> undef, i32 undef)
   %t19 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, <vscale x 4 x i1> undef, i32 undef)
   %t20 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptr %src, <vscale x 8 x i1> undef, i32 undef)
   %t21 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, <vscale x 8 x i1> undef, i32 undef)
   %t22 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptr %src, <vscale x 16 x i1> undef, i32 undef)
   %t23 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, <vscale x 16 x i1> undef, i32 undef)
   %t24 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptr %src, <vscale x 2 x i1> undef, i32 undef)
   %t25 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, <vscale x 2 x i1> undef, i32 undef)
   %t26 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptr %src, <vscale x 4 x i1> undef, i32 undef)
   %t27 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, <vscale x 4 x i1> undef, i32 undef)
   %t28 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptr %src, <vscale x 8 x i1> undef, i32 undef)
   %t29 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, <vscale x 8 x i1> undef, i32 undef)
   %t30 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptr %src, <vscale x 16 x i1> undef, i32 undef)
   %t31 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }

 define void @store() {
 ; CHECK-LABEL: 'store'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 2
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v4i8.p0(<4 x i8> undef, ptr undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 4
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v8i8.p0(<8 x i8> undef, ptr undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 8
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v16i8.p0(<16 x i8> undef, ptr undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i64.p0(<2 x i64> undef, ptr undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.vp.store.v4i64.p0(<4 x i64> undef, ptr undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, ptr undef, align 32
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v8i64.p0(<8 x i64> undef, ptr undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, ptr undef, align 64
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.store.v16i64.p0(<16 x i64> undef, ptr undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <16 x i64> undef, ptr undef, align 128
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.nxv2i8.p0(<vscale x 2 x i8> undef, ptr undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <vscale x 2 x i8> undef, ptr undef, align 2
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> undef, ptr undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <vscale x 4 x i8> undef, ptr undef, align 4
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.nxv8i8.p0(<vscale x 8 x i8> undef, ptr undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <vscale x 8 x i8> undef, ptr undef, align 8
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> undef, ptr undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <vscale x 16 x i8> undef, ptr undef, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> undef, ptr undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <vscale x 2 x i64> undef, ptr undef, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.nxv4i64.p0(<vscale x 4 x i64> undef, ptr undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <vscale x 4 x i64> undef, ptr undef, align 32
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.store.nxv8i64.p0(<vscale x 8 x i64> undef, ptr undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <vscale x 8 x i64> undef, ptr undef, align 64
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.vp.store.nxv16i64.p0(<vscale x 16 x i64> undef, ptr undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <vscale x 16 x i64> undef, ptr undef, align 128
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call void @llvm.vp.store.v2i8(<2 x i8> undef, ptr undef, <2 x i1> undef, i32 undef)
   store <2 x i8> undef, ptr undef
   call void @llvm.vp.store.v4i8(<4 x i8> undef, ptr undef, <4 x i1> undef, i32 undef)
   store <4 x i8> undef, ptr undef
   call void @llvm.vp.store.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef, i32 undef)
   store <8 x i8> undef, ptr undef
   call void @llvm.vp.store.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef, i32 undef)
   store <16 x i8> undef, ptr undef
   call void @llvm.vp.store.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef, i32 undef)
   store <2 x i64> undef, ptr undef
   call void @llvm.vp.store.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef, i32 undef)
   store <4 x i64> undef, ptr undef
   call void @llvm.vp.store.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef, i32 undef)
   store <8 x i64> undef, ptr undef
   call void @llvm.vp.store.v16i64(<16 x i64> undef, ptr undef, <16 x i1> undef, i32 undef)
   store <16 x i64> undef, ptr undef
   call void @llvm.vp.store.nxv2i8(<vscale x 2 x i8> undef, ptr undef, <vscale x 2 x i1> undef, i32 undef)
   store <vscale x 2 x i8> undef, ptr undef
   call void @llvm.vp.store.nxv4i8(<vscale x 4 x i8> undef, ptr undef, <vscale x 4 x i1> undef, i32 undef)
   store <vscale x 4 x i8> undef, ptr undef
   call void @llvm.vp.store.nxv8i8(<vscale x 8 x i8> undef, ptr undef, <vscale x 8 x i1> undef, i32 undef)
   store <vscale x 8 x i8> undef, ptr undef
   call void @llvm.vp.store.nxv16i8(<vscale x 16 x i8> undef, ptr undef, <vscale x 16 x i1> undef, i32 undef)
   store <vscale x 16 x i8> undef, ptr undef
   call void @llvm.vp.store.nxv2i64(<vscale x 2 x i64> undef, ptr undef, <vscale x 2 x i1> undef, i32 undef)
   store <vscale x 2 x i64> undef, ptr undef
   call void @llvm.vp.store.nxv4i64(<vscale x 4 x i64> undef, ptr undef, <vscale x 4 x i1> undef, i32 undef)
   store <vscale x 4 x i64> undef, ptr undef
   call void @llvm.vp.store.nxv8i64(<vscale x 8 x i64> undef, ptr undef, <vscale x 8 x i1> undef, i32 undef)
   store <vscale x 8 x i64> undef, ptr undef
   call void @llvm.vp.store.nxv16i64(<vscale x 16 x i64> undef, ptr undef, <vscale x 16 x i1> undef, i32 undef)
   store <vscale x 16 x i64> undef, ptr undef
   ret void
 }

 define void @gather() {
 ; ARGBASED-LABEL: 'gather'
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> poison, <2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> poison, <4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = call <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr> poison, <8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = call <16 x i8> @llvm.vp.gather.v16i8.v16p0(<16 x ptr> poison, <16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %5 = call <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr> poison, <2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %6 = call <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr> poison, <4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %7 = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> poison, <8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %8 = call <16 x i64> @llvm.vp.gather.v16i64.v16p0(<16 x ptr> poison, <16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> poison, <vscale x 2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> poison, <vscale x 4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %11 = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> poison, <vscale x 8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %12 = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> poison, <vscale x 16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %13 = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> poison, <vscale x 2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %14 = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> poison, <vscale x 4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %15 = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> poison, <vscale x 8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 16 x i64> @llvm.vp.gather.nxv16i64.nxv16p0(<vscale x 16 x ptr> poison, <vscale x 16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'gather'
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %1 = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> poison, <2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %2 = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> poison, <4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %3 = call <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr> poison, <8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %4 = call <16 x i8> @llvm.vp.gather.v16i8.v16p0(<16 x ptr> poison, <16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %5 = call <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr> poison, <2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %6 = call <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr> poison, <4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %7 = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> poison, <8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %8 = call <16 x i64> @llvm.vp.gather.v16i64.v16p0(<16 x ptr> poison, <16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %9 = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> poison, <vscale x 2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %10 = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> poison, <vscale x 4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %11 = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> poison, <vscale x 8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %12 = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> poison, <vscale x 16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %13 = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> poison, <vscale x 2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %14 = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> poison, <vscale x 4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %15 = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> poison, <vscale x 8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 16 x i64> @llvm.vp.gather.nxv16i64.nxv16p0(<vscale x 16 x ptr> poison, <vscale x 16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call <2 x i8> @llvm.vp.gather(<2 x ptr> poison, <2 x i1> poison, i32 poison)
   call <4 x i8> @llvm.vp.gather(<4 x ptr> poison, <4 x i1> poison, i32 poison)
   call <8 x i8> @llvm.vp.gather(<8 x ptr> poison, <8 x i1> poison, i32 poison)
   call <16 x i8> @llvm.vp.gather(<16 x ptr> poison, <16 x i1> poison, i32 poison)
   call <2 x i64> @llvm.vp.gather(<2 x ptr> poison, <2 x i1> poison, i32 poison)
   call <4 x i64> @llvm.vp.gather(<4 x ptr> poison, <4 x i1> poison, i32 poison)
   call <8 x i64> @llvm.vp.gather(<8 x ptr> poison, <8 x i1> poison, i32 poison)
   call <16 x i64> @llvm.vp.gather(<16 x ptr> poison, <16 x i1> poison, i32 poison)
   call <vscale x 2 x i8> @llvm.vp.gather(<vscale x 2 x ptr> poison, <vscale x 2 x i1> poison, i32 poison)
   call <vscale x 4 x i8> @llvm.vp.gather(<vscale x 4 x ptr> poison, <vscale x 4 x i1> poison, i32 poison)
   call <vscale x 8 x i8> @llvm.vp.gather(<vscale x 8 x ptr> poison, <vscale x 8 x i1> poison, i32 poison)
   call <vscale x 16 x i8> @llvm.vp.gather(<vscale x 16 x ptr> poison, <vscale x 16 x i1> poison, i32 poison)
   call <vscale x 2 x i64> @llvm.vp.gather(<vscale x 2 x ptr> poison, <vscale x 2 x i1> poison, i32 poison)
   call <vscale x 4 x i64> @llvm.vp.gather(<vscale x 4 x ptr> poison, <vscale x 4 x i1> poison, i32 poison)
   call <vscale x 8 x i64> @llvm.vp.gather(<vscale x 8 x ptr> poison, <vscale x 8 x i1> poison, i32 poison)
   call <vscale x 16 x i64> @llvm.vp.gather(<vscale x 16 x ptr> poison, <vscale x 16 x i1> poison, i32 poison)
   ret void
 }

 define void @scatter() {
 ; ARGBASED-LABEL: 'scatter'
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> poison, <2 x ptr> poison, <2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.scatter.v4i8.v4p0(<4 x i8> poison, <4 x ptr> poison, <4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.scatter.v8i8.v8p0(<8 x i8> poison, <8 x ptr> poison, <8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.vp.scatter.v16i8.v16p0(<16 x i8> poison, <16 x ptr> poison, <16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.vp.scatter.v2i64.v2p0(<2 x i64> poison, <2 x ptr> poison, <2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.vp.scatter.v4i64.v4p0(<4 x i64> poison, <4 x ptr> poison, <4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> poison, <8 x ptr> poison, <8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: call void @llvm.vp.scatter.v16i64.v16p0(<16 x i64> poison, <16 x ptr> poison, <16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> poison, <vscale x 2 x ptr> poison, <vscale x 2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> poison, <vscale x 4 x ptr> poison, <vscale x 4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> poison, <vscale x 8 x ptr> poison, <vscale x 8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> poison, <vscale x 16 x ptr> poison, <vscale x 16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> poison, <vscale x 2 x ptr> poison, <vscale x 2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> poison, <vscale x 4 x ptr> poison, <vscale x 4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> poison, <vscale x 8 x ptr> poison, <vscale x 8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv16i64.nxv16p0(<vscale x 16 x i64> poison, <vscale x 16 x ptr> poison, <vscale x 16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'scatter'
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> poison, <2 x ptr> poison, <2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.vp.scatter.v4i8.v4p0(<4 x i8> poison, <4 x ptr> poison, <4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.vp.scatter.v8i8.v8p0(<8 x i8> poison, <8 x ptr> poison, <8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: call void @llvm.vp.scatter.v16i8.v16p0(<16 x i8> poison, <16 x ptr> poison, <16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.vp.scatter.v2i64.v2p0(<2 x i64> poison, <2 x ptr> poison, <2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.vp.scatter.v4i64.v4p0(<4 x i64> poison, <4 x ptr> poison, <4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> poison, <8 x ptr> poison, <8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: call void @llvm.vp.scatter.v16i64.v16p0(<16 x i64> poison, <16 x ptr> poison, <16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> poison, <vscale x 2 x ptr> poison, <vscale x 2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> poison, <vscale x 4 x ptr> poison, <vscale x 4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> poison, <vscale x 8 x ptr> poison, <vscale x 8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> poison, <vscale x 16 x ptr> poison, <vscale x 16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> poison, <vscale x 2 x ptr> poison, <vscale x 2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> poison, <vscale x 4 x ptr> poison, <vscale x 4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> poison, <vscale x 8 x ptr> poison, <vscale x 8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv16i64.nxv16p0(<vscale x 16 x i64> poison, <vscale x 16 x ptr> poison, <vscale x 16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call void @llvm.vp.scatter(<2 x i8> poison, <2 x ptr> poison, <2 x i1> poison, i32 poison)
   call void @llvm.vp.scatter(<4 x i8> poison, <4 x ptr> poison, <4 x i1> poison, i32 poison)
   call void @llvm.vp.scatter(<8 x i8> poison, <8 x ptr> poison, <8 x i1> poison, i32 poison)
   call void @llvm.vp.scatter(<16 x i8> poison, <16 x ptr> poison, <16 x i1> poison, i32 poison)
   call void @llvm.vp.scatter(<2 x i64> poison, <2 x ptr> poison, <2 x i1> poison, i32 poison)
   call void @llvm.vp.scatter(<4 x i64> poison, <4 x ptr> poison, <4 x i1> poison, i32 poison)
   call void @llvm.vp.scatter(<8 x i64> poison, <8 x ptr> poison, <8 x i1> poison, i32 poison)
   call void @llvm.vp.scatter(<16 x i64> poison, <16 x ptr> poison, <16 x i1> poison, i32 poison)
   call void @llvm.vp.scatter(<vscale x 2 x i8> poison, <vscale x 2 x ptr> poison, <vscale x 2 x i1> poison, i32 poison)
   call void @llvm.vp.scatter(<vscale x 4 x i8> poison, <vscale x 4 x ptr> poison, <vscale x 4 x i1> poison, i32 poison)
   call void @llvm.vp.scatter(<vscale x 8 x i8> poison, <vscale x 8 x ptr> poison, <vscale x 8 x i1> poison, i32 poison)
   call void @llvm.vp.scatter(<vscale x 16 x i8> poison, <vscale x 16 x ptr> poison, <vscale x 16 x i1> poison, i32 poison)
   call void @llvm.vp.scatter(<vscale x 2 x i64> poison, <vscale x 2 x ptr> poison, <vscale x 2 x i1> poison, i32 poison)
   call void @llvm.vp.scatter(<vscale x 4 x i64> poison, <vscale x 4 x ptr> poison, <vscale x 4 x i1> poison, i32 poison)
   call void @llvm.vp.scatter(<vscale x 8 x i64> poison, <vscale x 8 x ptr> poison, <vscale x 8 x i1> poison, i32 poison)
   call void @llvm.vp.scatter(<vscale x 16 x i64> poison, <vscale x 16 x ptr> poison, <vscale x 16 x i1> poison, i32 poison)
   ret void
 }

 define void @strided_load() {
 ; ARGBASED-LABEL: 'strided_load'
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %ti1_2 = call <2 x i1> @llvm.experimental.vp.strided.load.v2i1.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %ti1_4 = call <4 x i1> @llvm.experimental.vp.strided.load.v4i1.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %ti1_8 = call <8 x i1> @llvm.experimental.vp.strided.load.v8i1.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %ti1_16 = call <16 x i1> @llvm.experimental.vp.strided.load.v16i1.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = call <4 x i8> @llvm.experimental.vp.strided.load.v4i8.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t4 = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t6 = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8.a = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr align 8 undef, i64 undef, <2 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10.a = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 undef, i64 undef, <4 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t13.a = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i64(ptr align 8 undef, i64 undef, <8 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t15.a = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr align 8 undef, i64 undef, <16 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t13 = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t15 = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t17 = call <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.p0.i64(ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t19 = call <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.p0.i64(ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t21 = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.p0.i64(ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t23 = call <vscale x 16 x i8> @llvm.experimental.vp.strided.load.nxv16i8.p0.i64(ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t25 = call <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.p0.i64(ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t27 = call <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.p0.i64(ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t29 = call <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.p0.i64(ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t31 = call <vscale x 16 x i64> @llvm.experimental.vp.strided.load.nxv16i64.p0.i64(ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'strided_load'
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ti1_2 = call <2 x i1> @llvm.experimental.vp.strided.load.v2i1.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %ti1_4 = call <4 x i1> @llvm.experimental.vp.strided.load.v4i1.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %ti1_8 = call <8 x i1> @llvm.experimental.vp.strided.load.v8i1.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %ti1_16 = call <16 x i1> @llvm.experimental.vp.strided.load.v16i1.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = call <4 x i8> @llvm.experimental.vp.strided.load.v4i8.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t4 = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t6 = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8.a = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr align 8 undef, i64 undef, <2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10.a = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 undef, i64 undef, <4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t13.a = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i64(ptr align 8 undef, i64 undef, <8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t15.a = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr align 8 undef, i64 undef, <16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t13 = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t15 = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t17 = call <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.p0.i64(ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t19 = call <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.p0.i64(ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t21 = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.p0.i64(ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t23 = call <vscale x 16 x i8> @llvm.experimental.vp.strided.load.nxv16i8.p0.i64(ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t25 = call <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.p0.i64(ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t27 = call <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.p0.i64(ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t29 = call <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.p0.i64(ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t31 = call <vscale x 16 x i64> @llvm.experimental.vp.strided.load.nxv16i64.p0.i64(ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %ti1_2 = call <2 x i1> @llvm.experimental.vp.strided.load.v2i1.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
   %ti1_4 = call <4 x i1> @llvm.experimental.vp.strided.load.v4i1.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
   %ti1_8 = call <8 x i1> @llvm.experimental.vp.strided.load.v8i1.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
   %ti1_16 = call <16 x i1> @llvm.experimental.vp.strided.load.v16i1.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
   %t0 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
   %t2 = call <4 x i8> @llvm.experimental.vp.strided.load.v4i8.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
   %t4 = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
   %t6 = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
   %t8.a = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.i64(ptr align(8)  undef, i64 undef, <2 x i1> undef, i32 undef)
   %t10.a = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.i64(ptr align(8)  undef, i64 undef, <4 x i1> undef, i32 undef)
   %t13.a = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.i64(ptr align(8)  undef, i64 undef, <8 x i1> undef, i32 undef)
   %t15.a = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.i64(ptr align(8) undef, i64 undef, <16 x i1> undef, i32 undef)
   %t8 = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
   %t10 = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
   %t13 = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
   %t15 = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
   %t17 = call <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.i64(ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
   %t19 = call <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.i64(ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
   %t21 = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.i64(ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
   %t23 = call <vscale x 16 x i8> @llvm.experimental.vp.strided.load.nxv16i8.i64(ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
   %t25 = call <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.i64(ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
   %t27 = call <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.i64(ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
   %t29 = call <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.i64(ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
   %t31 = call <vscale x 16 x i64> @llvm.experimental.vp.strided.load.nxv16i64.i64(ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }

 define void @strided_store() {
 ; CHECK-LABEL: 'strided_store'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> undef, ptr undef, i64 undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.v4i8.p0.i64(<4 x i8> undef, ptr undef, i64 undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.v8i8.p0.i64(<8 x i8> undef, ptr undef, i64 undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vp.strided.store.v16i8.p0.i64(<16 x i8> undef, ptr undef, i64 undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.experimental.vp.strided.store.v2i64.p0.i64(<2 x i64> undef, ptr undef, i64 undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> undef, ptr undef, i64 undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.v8i64.p0.i64(<8 x i64> undef, ptr undef, i64 undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vp.strided.store.v16i64.p0.i64(<16 x i64> undef, ptr undef, i64 undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.experimental.vp.strided.store.v2i64.p0.i64(<2 x i64> undef, ptr align 8 undef, i64 undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> undef, ptr align 8 undef, i64 undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.v8i64.p0.i64(<8 x i64> undef, ptr align 8 undef, i64 undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vp.strided.store.v16i64.p0.i64(<16 x i64> undef, ptr align 8 undef, i64 undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.nxv2i8.p0.i64(<vscale x 2 x i8> undef, ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.nxv4i8.p0.i64(<vscale x 4 x i8> undef, ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vp.strided.store.nxv8i8.p0.i64(<vscale x 8 x i8> undef, ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vp.strided.store.nxv16i8.p0.i64(<vscale x 16 x i8> undef, ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.nxv2i64.p0.i64(<vscale x 2 x i64> undef, ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.nxv4i64.p0.i64(<vscale x 4 x i64> undef, ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vp.strided.store.nxv8i64.p0.i64(<vscale x 8 x i64> undef, ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vp.strided.store.nxv16i64.p0.i64(<vscale x 16 x i64> undef, ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call void @llvm.experimental.vp.strided.store.v2i8.i64(<2 x i8> undef, ptr undef, i64 undef, <2 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.v4i8.i64(<4 x i8> undef, ptr undef, i64 undef, <4 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.v8i8.i64(<8 x i8> undef, ptr undef, i64 undef, <8 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.v16i8.i64(<16 x i8> undef, ptr undef, i64 undef, <16 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.v2i64.i64(<2 x i64> undef, ptr undef, i64 undef, <2 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.v4i64.i64(<4 x i64> undef, ptr undef, i64 undef, <4 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.v8i64.i64(<8 x i64> undef, ptr undef, i64 undef, <8 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.v16i64.i64(<16 x i64> undef, ptr undef, i64 undef, <16 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.v2i64.i64(<2 x i64> undef, ptr align(8) undef, i64 undef, <2 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.v4i64.i64(<4 x i64> undef, ptr align(8) undef, i64 undef, <4 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.v8i64.i64(<8 x i64> undef, ptr align(8) undef, i64 undef, <8 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.v16i64.i64(<16 x i64> undef, ptr align(8) undef, i64 undef, <16 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.nxv2i8.i64(<vscale x 2 x i8> undef, ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.nxv4i8.i64(<vscale x 4 x i8> undef, ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.nxv8i8.i64(<vscale x 8 x i8> undef, ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.nxv16i8.i64(<vscale x 16 x i8> undef, ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.nxv2i64.i64(<vscale x 2 x i64> undef, ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.nxv4i64.i64(<vscale x 4 x i64> undef, ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.nxv8i64.i64(<vscale x 8 x i64> undef, ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
   call void @llvm.experimental.vp.strided.store.nxv16i64.i64(<vscale x 16 x i64> undef, ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }


 define void @reduce_add() {
 ; CHECK-LABEL: 'reduce_add'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %5 = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %6 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %7 = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %12 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %13 = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %14 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %15 = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %17 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %19 = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %20 = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %22 = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call i8 @llvm.vp.reduce.add.nxv16i8(i8 undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %25 = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %26 = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %27 = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %28 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %30 = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %31 = call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %32 = call i64 @llvm.vector.reduce.add.nxv16i64(<vscale x 16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
   call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
   call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
   call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
   call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
   call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
   call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
   call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
   call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
   call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
   call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
   call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
   call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
   call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
   call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
   call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
   call i8 @llvm.vp.reduce.add.nxv8i8(i8  undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
   call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> undef)
   call i8 @llvm.vp.reduce.add.nxv4i8(i8  undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
   call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> undef)
   call i8 @llvm.vp.reduce.add.nxv8i8(i8  undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
   call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> undef)
   call i8 @llvm.vp.reduce.add.nxv16i8(i8  undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
   call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> undef)
   call i64 @llvm.vp.reduce.add.nxv2i64(i64  undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
   call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> undef)
   call i64 @llvm.vp.reduce.add.nxv4i64(i64  undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
   call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> undef)
   call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
   call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> undef)
   call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
   call i64 @llvm.vector.reduce.add.nxv16i64(<vscale x 16 x i64> undef)
   ret void
 }

 define void @reduce_fadd() {
 ; CHECK-LABEL: 'reduce_fadd'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = call float @llvm.vector.reduce.fadd.v2f32(float undef, <2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %3 = call float @llvm.vp.reduce.fadd.v4f32(float undef, <4 x float> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %4 = call float @llvm.vector.reduce.fadd.v4f32(float undef, <4 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %5 = call float @llvm.vp.reduce.fadd.v8f32(float undef, <8 x float> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %6 = call float @llvm.vector.reduce.fadd.v8f32(float undef, <8 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %7 = call float @llvm.vp.reduce.fadd.v16f32(float undef, <16 x float> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %8 = call float @llvm.vector.reduce.fadd.v16f32(float undef, <16 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call double @llvm.vp.reduce.fadd.v2f64(double undef, <2 x double> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call double @llvm.vector.reduce.fadd.v2f64(double undef, <2 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %11 = call double @llvm.vp.reduce.fadd.v4f64(double undef, <4 x double> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %12 = call double @llvm.vector.reduce.fadd.v4f64(double undef, <4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %13 = call double @llvm.vp.reduce.fadd.v8f64(double undef, <8 x double> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %14 = call double @llvm.vector.reduce.fadd.v8f64(double undef, <8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %15 = call double @llvm.vp.reduce.fadd.v16f64(double undef, <16 x double> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %16 = call double @llvm.vector.reduce.fadd.v16f64(double undef, <16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %17 = call float @llvm.vp.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %18 = call float @llvm.vector.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %19 = call float @llvm.vp.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %20 = call float @llvm.vector.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %21 = call float @llvm.vp.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %22 = call float @llvm.vector.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %23 = call float @llvm.vp.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %24 = call float @llvm.vector.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %25 = call double @llvm.vp.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %26 = call double @llvm.vector.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %27 = call double @llvm.vp.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %28 = call double @llvm.vector.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %29 = call double @llvm.vp.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %30 = call double @llvm.vector.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %31 = call double @llvm.vp.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %32 = call double @llvm.vector.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef)
   call float @llvm.vector.reduce.fadd.v2f32(float undef, <2 x float> undef)
   call float @llvm.vp.reduce.fadd.v4f32(float undef, <4 x float> undef, <4 x i1> undef, i32 undef)
   call float @llvm.vector.reduce.fadd.v4f32(float undef, <4 x float> undef)
   call float @llvm.vp.reduce.fadd.v8f32(float undef, <8 x float> undef, <8 x i1> undef, i32 undef)
   call float @llvm.vector.reduce.fadd.v8f32(float undef, <8 x float> undef)
   call float @llvm.vp.reduce.fadd.v16f32(float undef, <16 x float> undef, <16 x i1> undef, i32 undef)
   call float @llvm.vector.reduce.fadd.v16f32(float undef, <16 x float> undef)
   call double @llvm.vp.reduce.fadd.v2f64(double undef, <2 x double> undef, <2 x i1> undef, i32 undef)
   call double @llvm.vector.reduce.fadd.v2f64(double undef, <2 x double> undef)
   call double @llvm.vp.reduce.fadd.v4f64(double undef, <4 x double> undef, <4 x i1> undef, i32 undef)
   call double @llvm.vector.reduce.fadd.v4f64(double undef, <4 x double> undef)
   call double @llvm.vp.reduce.fadd.v8f64(double undef, <8 x double> undef, <8 x i1> undef, i32 undef)
   call double @llvm.vector.reduce.fadd.v8f64(double undef, <8 x double> undef)
   call double @llvm.vp.reduce.fadd.v16f64(double undef, <16 x double> undef, <16 x i1> undef, i32 undef)
   call double @llvm.vector.reduce.fadd.v16f64(double undef, <16 x double> undef)
   call float @llvm.vp.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
   call float @llvm.vector.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef)
   call float @llvm.vp.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
   call float @llvm.vector.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef)
   call float @llvm.vp.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
   call float @llvm.vector.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef)
   call float @llvm.vp.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
   call float @llvm.vector.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef)
   call double @llvm.vp.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
   call double @llvm.vector.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef)
   call double @llvm.vp.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
   call double @llvm.vector.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef)
   call double @llvm.vp.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
   call double @llvm.vector.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef)
   call double @llvm.vp.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
   call double @llvm.vector.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef)
   ret void
 }

 define void @reduce_other() {
 ; CHECK-LABEL: 'reduce_other'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %4 = call i32 @llvm.vp.reduce.mul.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %9 = call i32 @llvm.vp.reduce.mul.nxv4i32(i32 undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %10 = call float @llvm.vp.reduce.fmul.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %12 = call float @llvm.vp.reduce.fminimum.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %13 = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = call float @llvm.vp.reduce.fmaximum.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
   call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
   call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
   call i32 @llvm.vp.reduce.mul.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
   call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
   call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
   call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
   call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)

   call i32 @llvm.vp.reduce.mul.v4i32(i32 undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)

   call float @llvm.vp.reduce.fmul.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
   call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
   call float @llvm.vp.reduce.fminimum.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
   call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
   call float @llvm.vp.reduce.fmaximum.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
   ret void
 }


 define void @vp_fadd(){
 ; CHECK-LABEL: 'vp_fadd'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t6 = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t12 = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t14 = call <16 x double> @llvm.vp.fadd.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t17 = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t19 = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t21 = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t23 = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t25 = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t27 = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t29 = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t31 = call <vscale x 16 x double> @llvm.vp.fadd.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t32 = fadd <vscale x 16 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t0 = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
   %t2 = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
   %t4 = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
   %t6 = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)

   %t8 = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
   %t10 = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
   %t12 = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
   %t14 = call <16 x double> @llvm.vp.fadd.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)

   %t17 = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
   %t19 = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
   %t21 = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
   %t23 = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)

   %t25 = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
   %t27 = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
   %t29 = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
   %t31 = call <vscale x 16 x double> @llvm.vp.fadd.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
   %t32 = fadd <vscale x 16 x double> undef, undef

   ret void
 }

 define void @vp_fsub(){
 ; CHECK-LABEL: 'vp_fsub'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t6 = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t12 = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t14 = call <16 x double> @llvm.vp.fsub.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t17 = call <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t19 = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t21 = call <vscale x 8 x float> @llvm.vp.fsub.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t23 = call <vscale x 16 x float> @llvm.vp.fsub.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t25 = call <vscale x 2 x double> @llvm.vp.fsub.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t27 = call <vscale x 4 x double> @llvm.vp.fsub.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t29 = call <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t31 = call <vscale x 16 x double> @llvm.vp.fsub.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t0 = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
   %t2 = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
   %t4 = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
   %t6 = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)

   %t8 = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
   %t10 = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
   %t12 = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
   %t14 = call <16 x double> @llvm.vp.fsub.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)

   %t17 = call <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
   %t19 = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
   %t21 = call <vscale x 8 x float> @llvm.vp.fsub.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
   %t23 = call <vscale x 16 x float> @llvm.vp.fsub.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)

   %t25 = call <vscale x 2 x double> @llvm.vp.fsub.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
   %t27 = call <vscale x 4 x double> @llvm.vp.fsub.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
   %t29 = call <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
   %t31 = call <vscale x 16 x double> @llvm.vp.fsub.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)

   ret void
 }

 define void @vp_fmul(){
 ; CHECK-LABEL: 'vp_fmul'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t6 = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t12 = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t14 = call <16 x double> @llvm.vp.fmul.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t17 = call <vscale x 2 x float> @llvm.vp.fmul.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t19 = call <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t21 = call <vscale x 8 x float> @llvm.vp.fmul.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t23 = call <vscale x 16 x float> @llvm.vp.fmul.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t25 = call <vscale x 2 x double> @llvm.vp.fmul.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t27 = call <vscale x 4 x double> @llvm.vp.fmul.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t29 = call <vscale x 8 x double> @llvm.vp.fmul.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t31 = call <vscale x 16 x double> @llvm.vp.fmul.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t0 = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
   %t2 = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
   %t4 = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
   %t6 = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)

   %t8 = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
   %t10 = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
   %t12 = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
   %t14 = call <16 x double> @llvm.vp.fmul.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)

   %t17 = call <vscale x 2 x float> @llvm.vp.fmul.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
   %t19 = call <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
   %t21 = call <vscale x 8 x float> @llvm.vp.fmul.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
   %t23 = call <vscale x 16 x float> @llvm.vp.fmul.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)

   %t25 = call <vscale x 2 x double> @llvm.vp.fmul.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
   %t27 = call <vscale x 4 x double> @llvm.vp.fmul.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
   %t29 = call <vscale x 8 x double> @llvm.vp.fmul.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
   %t31 = call <vscale x 16 x double> @llvm.vp.fmul.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)

   ret void
 }

 define void @vp_fdiv(){
 ; CHECK-LABEL: 'vp_fdiv'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t6 = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t12 = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t14 = call <16 x double> @llvm.vp.fdiv.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t17 = call <vscale x 2 x float> @llvm.vp.fdiv.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t19 = call <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t21 = call <vscale x 8 x float> @llvm.vp.fdiv.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t23 = call <vscale x 16 x float> @llvm.vp.fdiv.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t25 = call <vscale x 2 x double> @llvm.vp.fdiv.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t27 = call <vscale x 4 x double> @llvm.vp.fdiv.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t29 = call <vscale x 8 x double> @llvm.vp.fdiv.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t31 = call <vscale x 16 x double> @llvm.vp.fdiv.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t0 = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
   %t2 = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
   %t4 = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
   %t6 = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)

   %t8 = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
   %t10 = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
   %t12 = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
   %t14 = call <16 x double> @llvm.vp.fdiv.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)

   %t17 = call <vscale x 2 x float> @llvm.vp.fdiv.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
   %t19 = call <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
   %t21 = call <vscale x 8 x float> @llvm.vp.fdiv.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
   %t23 = call <vscale x 16 x float> @llvm.vp.fdiv.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)

   %t25 = call <vscale x 2 x double> @llvm.vp.fdiv.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
   %t27 = call <vscale x 4 x double> @llvm.vp.fdiv.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
   %t29 = call <vscale x 8 x double> @llvm.vp.fdiv.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
   %t31 = call <vscale x 16 x double> @llvm.vp.fdiv.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)

   ret void
 }

 define void @splice() {
 ; CHECK-LABEL: 'splice'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vp.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1, <vscale x 16 x i1> zeroinitializer, i32 poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vp.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1, <vscale x 32 x i1> zeroinitializer, i32 poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.experimental.vp.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1, <vscale x 2 x i1> zeroinitializer, i32 poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.experimental.vp.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vp.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1, <vscale x 8 x i1> zeroinitializer, i32 poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vp.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1, <vscale x 16 x i1> zeroinitializer, i32 poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vp.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vp.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1, <vscale x 8 x i1> zeroinitializer, i32 poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vp.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1, <vscale x 2 x i1> zeroinitializer, i32 poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vp.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.experimental.vp.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1, <vscale x 16 x i1> zeroinitializer, i32 poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.experimental.vp.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1, <vscale x 8 x i1> zeroinitializer, i32 poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.experimental.vp.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.experimental.vp.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1, <vscale x 2 x i1> zeroinitializer, i32 poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vp.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1, <vscale x 16 x i1> zeroinitializer, i32 poison, i32 poison)
   %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vp.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1, <vscale x 32 x i1> zeroinitializer, i32 poison, i32 poison)
   %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.experimental.vp.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1, <vscale x 2 x i1> zeroinitializer, i32 poison, i32 poison)
   %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.experimental.vp.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
   %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vp.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1, <vscale x 8 x i1> zeroinitializer, i32 poison, i32 poison)
   %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vp.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1, <vscale x 16 x i1> zeroinitializer, i32 poison, i32 poison)
   %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vp.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
   %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vp.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1, <vscale x 8 x i1> zeroinitializer, i32 poison, i32 poison)
   %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vp.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1, <vscale x 2 x i1> zeroinitializer, i32 poison, i32 poison)
   %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vp.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
   %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.experimental.vp.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1, <vscale x 16 x i1> zeroinitializer, i32 poison, i32 poison)
   %splice_nxv8i1 =  call <vscale x 8 x i1> @llvm.experimental.vp.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1, <vscale x 8 x i1> zeroinitializer, i32 poison, i32 poison)
   %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.experimental.vp.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
   %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.experimental.vp.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1, <vscale x 2 x i1> zeroinitializer, i32 poison, i32 poison)
   ret void
 }

 define void @is.fpclass() {
 ; ARGBASED-LABEL: 'is.fpclass'
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = call <2 x i1> @llvm.vp.is.fpclass.v2bf16(<2 x bfloat> poison, i32 0, <2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %2 = call <4 x i1> @llvm.vp.is.fpclass.v4bf16(<4 x bfloat> poison, i32 0, <4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %3 = call <8 x i1> @llvm.vp.is.fpclass.v8bf16(<8 x bfloat> poison, i32 0, <8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %4 = call <16 x i1> @llvm.vp.is.fpclass.v16bf16(<16 x bfloat> poison, i32 0, <16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = call <2 x i1> @llvm.vp.is.fpclass.v2f16(<2 x half> poison, i32 0, <2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %6 = call <4 x i1> @llvm.vp.is.fpclass.v4f16(<4 x half> poison, i32 0, <4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %7 = call <8 x i1> @llvm.vp.is.fpclass.v8f16(<8 x half> poison, i32 0, <8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %8 = call <16 x i1> @llvm.vp.is.fpclass.v16f16(<16 x half> poison, i32 0, <16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %9 = call <2 x i1> @llvm.vp.is.fpclass.v2f32(<2 x float> poison, i32 0, <2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %10 = call <4 x i1> @llvm.vp.is.fpclass.v4f32(<4 x float> poison, i32 0, <4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %11 = call <8 x i1> @llvm.vp.is.fpclass.v8f32(<8 x float> poison, i32 0, <8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %12 = call <16 x i1> @llvm.vp.is.fpclass.v16f32(<16 x float> poison, i32 0, <16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %13 = call <2 x i1> @llvm.vp.is.fpclass.v2f64(<2 x double> poison, i32 0, <2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %14 = call <4 x i1> @llvm.vp.is.fpclass.v4f64(<4 x double> poison, i32 0, <4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %15 = call <8 x i1> @llvm.vp.is.fpclass.v8f64(<8 x double> poison, i32 0, <8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %16 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> poison, i32 0, <16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i1> @llvm.vp.is.fpclass.nxv2bf16(<vscale x 2 x bfloat> poison, i32 0, <vscale x 2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i1> @llvm.vp.is.fpclass.nxv4bf16(<vscale x 4 x bfloat> poison, i32 0, <vscale x 4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i1> @llvm.vp.is.fpclass.nxv8bf16(<vscale x 8 x bfloat> poison, i32 0, <vscale x 8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %20 = call <vscale x 16 x i1> @llvm.vp.is.fpclass.nxv16bf16(<vscale x 16 x bfloat> poison, i32 0, <vscale x 16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %21 = call <vscale x 2 x i1> @llvm.vp.is.fpclass.nxv2f16(<vscale x 2 x half> poison, i32 0, <vscale x 2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %22 = call <vscale x 4 x i1> @llvm.vp.is.fpclass.nxv4f16(<vscale x 4 x half> poison, i32 0, <vscale x 4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %23 = call <vscale x 8 x i1> @llvm.vp.is.fpclass.nxv8f16(<vscale x 8 x half> poison, i32 0, <vscale x 8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %24 = call <vscale x 16 x i1> @llvm.vp.is.fpclass.nxv16f16(<vscale x 16 x half> poison, i32 0, <vscale x 16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %25 = call <vscale x 2 x i1> @llvm.vp.is.fpclass.nxv2f32(<vscale x 2 x float> poison, i32 0, <vscale x 2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %26 = call <vscale x 4 x i1> @llvm.vp.is.fpclass.nxv4f32(<vscale x 4 x float> poison, i32 0, <vscale x 4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %27 = call <vscale x 8 x i1> @llvm.vp.is.fpclass.nxv8f32(<vscale x 8 x float> poison, i32 0, <vscale x 8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %28 = call <vscale x 16 x i1> @llvm.vp.is.fpclass.nxv16f32(<vscale x 16 x float> poison, i32 0, <vscale x 16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %29 = call <vscale x 2 x i1> @llvm.vp.is.fpclass.nxv2f64(<vscale x 2 x double> poison, i32 0, <vscale x 2 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %30 = call <vscale x 4 x i1> @llvm.vp.is.fpclass.nxv4f64(<vscale x 4 x double> poison, i32 0, <vscale x 4 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %31 = call <vscale x 8 x i1> @llvm.vp.is.fpclass.nxv8f64(<vscale x 8 x double> poison, i32 0, <vscale x 8 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Invalid cost for instruction: %32 = call <vscale x 16 x i1> @llvm.vp.is.fpclass.nxv16f64(<vscale x 16 x double> poison, i32 0, <vscale x 16 x i1> poison, i32 poison)
 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'is.fpclass'
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %1 = call <2 x i1> @llvm.vp.is.fpclass.v2bf16(<2 x bfloat> poison, i32 0, <2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %2 = call <4 x i1> @llvm.vp.is.fpclass.v4bf16(<4 x bfloat> poison, i32 0, <4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %3 = call <8 x i1> @llvm.vp.is.fpclass.v8bf16(<8 x bfloat> poison, i32 0, <8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %4 = call <16 x i1> @llvm.vp.is.fpclass.v16bf16(<16 x bfloat> poison, i32 0, <16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %5 = call <2 x i1> @llvm.vp.is.fpclass.v2f16(<2 x half> poison, i32 0, <2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %6 = call <4 x i1> @llvm.vp.is.fpclass.v4f16(<4 x half> poison, i32 0, <4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %7 = call <8 x i1> @llvm.vp.is.fpclass.v8f16(<8 x half> poison, i32 0, <8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %8 = call <16 x i1> @llvm.vp.is.fpclass.v16f16(<16 x half> poison, i32 0, <16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %9 = call <2 x i1> @llvm.vp.is.fpclass.v2f32(<2 x float> poison, i32 0, <2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %10 = call <4 x i1> @llvm.vp.is.fpclass.v4f32(<4 x float> poison, i32 0, <4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %11 = call <8 x i1> @llvm.vp.is.fpclass.v8f32(<8 x float> poison, i32 0, <8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %12 = call <16 x i1> @llvm.vp.is.fpclass.v16f32(<16 x float> poison, i32 0, <16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %13 = call <2 x i1> @llvm.vp.is.fpclass.v2f64(<2 x double> poison, i32 0, <2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %14 = call <4 x i1> @llvm.vp.is.fpclass.v4f64(<4 x double> poison, i32 0, <4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %15 = call <8 x i1> @llvm.vp.is.fpclass.v8f64(<8 x double> poison, i32 0, <8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %16 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> poison, i32 0, <16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i1> @llvm.vp.is.fpclass.nxv2bf16(<vscale x 2 x bfloat> poison, i32 0, <vscale x 2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i1> @llvm.vp.is.fpclass.nxv4bf16(<vscale x 4 x bfloat> poison, i32 0, <vscale x 4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i1> @llvm.vp.is.fpclass.nxv8bf16(<vscale x 8 x bfloat> poison, i32 0, <vscale x 8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %20 = call <vscale x 16 x i1> @llvm.vp.is.fpclass.nxv16bf16(<vscale x 16 x bfloat> poison, i32 0, <vscale x 16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %21 = call <vscale x 2 x i1> @llvm.vp.is.fpclass.nxv2f16(<vscale x 2 x half> poison, i32 0, <vscale x 2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %22 = call <vscale x 4 x i1> @llvm.vp.is.fpclass.nxv4f16(<vscale x 4 x half> poison, i32 0, <vscale x 4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %23 = call <vscale x 8 x i1> @llvm.vp.is.fpclass.nxv8f16(<vscale x 8 x half> poison, i32 0, <vscale x 8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %24 = call <vscale x 16 x i1> @llvm.vp.is.fpclass.nxv16f16(<vscale x 16 x half> poison, i32 0, <vscale x 16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %25 = call <vscale x 2 x i1> @llvm.vp.is.fpclass.nxv2f32(<vscale x 2 x float> poison, i32 0, <vscale x 2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %26 = call <vscale x 4 x i1> @llvm.vp.is.fpclass.nxv4f32(<vscale x 4 x float> poison, i32 0, <vscale x 4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %27 = call <vscale x 8 x i1> @llvm.vp.is.fpclass.nxv8f32(<vscale x 8 x float> poison, i32 0, <vscale x 8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %28 = call <vscale x 16 x i1> @llvm.vp.is.fpclass.nxv16f32(<vscale x 16 x float> poison, i32 0, <vscale x 16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %29 = call <vscale x 2 x i1> @llvm.vp.is.fpclass.nxv2f64(<vscale x 2 x double> poison, i32 0, <vscale x 2 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %30 = call <vscale x 4 x i1> @llvm.vp.is.fpclass.nxv4f64(<vscale x 4 x double> poison, i32 0, <vscale x 4 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %31 = call <vscale x 8 x i1> @llvm.vp.is.fpclass.nxv8f64(<vscale x 8 x double> poison, i32 0, <vscale x 8 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %32 = call <vscale x 16 x i1> @llvm.vp.is.fpclass.nxv16f64(<vscale x 16 x double> poison, i32 0, <vscale x 16 x i1> poison, i32 poison)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call <2 x i1> @llvm.vp.is.fpclass.v2bf16(<2 x bfloat> poison, i32 0, <2 x i1> poison, i32 poison)
   call <4 x i1> @llvm.vp.is.fpclass.v4bf16(<4 x bfloat> poison, i32 0, <4 x i1> poison, i32 poison)
   call <8 x i1> @llvm.vp.is.fpclass.v8bf16(<8 x bfloat> poison, i32 0, <8 x i1> poison, i32 poison)
   call <16 x i1> @llvm.vp.is.fpclass.v16bf16(<16 x bfloat> poison, i32 0, <16 x i1> poison, i32 poison)
   call <2 x i1> @llvm.vp.is.fpclass.v2f16(<2 x half> poison, i32 0, <2 x i1> poison, i32 poison)
   call <4 x i1> @llvm.vp.is.fpclass.v4f16(<4 x half> poison, i32 0, <4 x i1> poison, i32 poison)
   call <8 x i1> @llvm.vp.is.fpclass.v8f16(<8 x half> poison, i32 0, <8 x i1> poison, i32 poison)
   call <16 x i1> @llvm.vp.is.fpclass.v16f16(<16 x half> poison, i32 0, <16 x i1> poison, i32 poison)
   call <2 x i1> @llvm.vp.is.fpclass.v2f32(<2 x float> poison, i32 0, <2 x i1> poison, i32 poison)
   call <4 x i1> @llvm.vp.is.fpclass.v4f32(<4 x float> poison, i32 0, <4 x i1> poison, i32 poison)
   call <8 x i1> @llvm.vp.is.fpclass.v8f32(<8 x float> poison, i32 0, <8 x i1> poison, i32 poison)
   call <16 x i1> @llvm.vp.is.fpclass.v16f32(<16 x float> poison, i32 0, <16 x i1> poison, i32 poison)
   call <2 x i1> @llvm.vp.is.fpclass.v2f64(<2 x double> poison, i32 0, <2 x i1> poison, i32 poison)
   call <4 x i1> @llvm.vp.is.fpclass.v4f64(<4 x double> poison, i32 0, <4 x i1> poison, i32 poison)
   call <8 x i1> @llvm.vp.is.fpclass.v8f64(<8 x double> poison, i32 0, <8 x i1> poison, i32 poison)
   call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> poison, i32 0, <16 x i1> poison, i32 poison)
   call <vscale x 2 x i1> @llvm.vp.is.fpclass.nxv2bf16(<vscale x 2 x bfloat> poison, i32 0, <vscale x 2 x i1> poison, i32 poison)
   call <vscale x 4 x i1> @llvm.vp.is.fpclass.nxv4bf16(<vscale x 4 x bfloat> poison, i32 0, <vscale x 4 x i1> poison, i32 poison)
   call <vscale x 8 x i1> @llvm.vp.is.fpclass.nxv8bf16(<vscale x 8 x bfloat> poison, i32 0, <vscale x 8 x i1> poison, i32 poison)
   call <vscale x 16 x i1> @llvm.vp.is.fpclass.nxv16bf16(<vscale x 16 x bfloat> poison, i32 0, <vscale x 16 x i1> poison, i32 poison)
   call <vscale x 2 x i1> @llvm.vp.is.fpclass.nxv2f16(<vscale x 2 x half> poison, i32 0, <vscale x 2 x i1> poison, i32 poison)
   call <vscale x 4 x i1> @llvm.vp.is.fpclass.nxv4f16(<vscale x 4 x half> poison, i32 0, <vscale x 4 x i1> poison, i32 poison)
   call <vscale x 8 x i1> @llvm.vp.is.fpclass.nxv8f16(<vscale x 8 x half> poison, i32 0, <vscale x 8 x i1> poison, i32 poison)
   call <vscale x 16 x i1> @llvm.vp.is.fpclass.nxv16f16(<vscale x 16 x half> poison, i32 0, <vscale x 16 x i1> poison, i32 poison)
   call <vscale x 2 x i1> @llvm.vp.is.fpclass.nxv2f32(<vscale x 2 x float> poison, i32 0, <vscale x 2 x i1> poison, i32 poison)
   call <vscale x 4 x i1> @llvm.vp.is.fpclass.nxv4f32(<vscale x 4 x float> poison, i32 0, <vscale x 4 x i1> poison, i32 poison)
   call <vscale x 8 x i1> @llvm.vp.is.fpclass.nxv8f32(<vscale x 8 x float> poison, i32 0, <vscale x 8 x i1> poison, i32 poison)
   call <vscale x 16 x i1> @llvm.vp.is.fpclass.nxv16f32(<vscale x 16 x float> poison, i32 0, <vscale x 16 x i1> poison, i32 poison)
   call <vscale x 2 x i1> @llvm.vp.is.fpclass.nxv2f64(<vscale x 2 x double> poison, i32 0, <vscale x 2 x i1> poison, i32 poison)
   call <vscale x 4 x i1> @llvm.vp.is.fpclass.nxv4f64(<vscale x 4 x double> poison, i32 0, <vscale x 4 x i1> poison, i32 poison)
   call <vscale x 8 x i1> @llvm.vp.is.fpclass.nxv8f64(<vscale x 8 x double> poison, i32 0, <vscale x 8 x i1> poison, i32 poison)
   call <vscale x 16 x i1> @llvm.vp.is.fpclass.nxv16f64(<vscale x 16 x double> poison, i32 0, <vscale x 16 x i1> poison, i32 poison)

   ret void
 }

 define void @reverse() {
 ; CHECK-LABEL: 'reverse'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> poison, <4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1> poison, <8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1> poison, <16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call <2 x i8> @llvm.experimental.vp.reverse.v2i8(<2 x i8> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <4 x i8> @llvm.experimental.vp.reverse.v4i8(<4 x i8> poison, <4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <8 x i8> @llvm.experimental.vp.reverse.v8i8(<8 x i8> poison, <8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = call <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8> poison, <16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call <2 x i16> @llvm.experimental.vp.reverse.v2i16(<2 x i16> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = call <4 x i16> @llvm.experimental.vp.reverse.v4i16(<4 x i16> poison, <4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %11 = call <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16> poison, <8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %12 = call <16 x i16> @llvm.experimental.vp.reverse.v16i16(<16 x i16> poison, <16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = call <2 x i32> @llvm.experimental.vp.reverse.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %14 = call <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %15 = call <8 x i32> @llvm.experimental.vp.reverse.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %16 = call <16 x i32> @llvm.experimental.vp.reverse.v16i32(<16 x i32> poison, <16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %17 = call <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %18 = call <4 x i64> @llvm.experimental.vp.reverse.v4i64(<4 x i64> poison, <4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %19 = call <8 x i64> @llvm.experimental.vp.reverse.v8i64(<8 x i64> poison, <8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %20 = call <16 x i64> @llvm.experimental.vp.reverse.v16i64(<16 x i64> poison, <16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %21 = call <2 x bfloat> @llvm.experimental.vp.reverse.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = call <4 x bfloat> @llvm.experimental.vp.reverse.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %23 = call <8 x bfloat> @llvm.experimental.vp.reverse.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %24 = call <16 x bfloat> @llvm.experimental.vp.reverse.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %25 = call <2 x half> @llvm.experimental.vp.reverse.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = call <4 x half> @llvm.experimental.vp.reverse.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %27 = call <8 x half> @llvm.experimental.vp.reverse.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %28 = call <16 x half> @llvm.experimental.vp.reverse.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %29 = call <2 x float> @llvm.experimental.vp.reverse.v2f32(<2 x float> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %30 = call <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float> poison, <4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %31 = call <8 x float> @llvm.experimental.vp.reverse.v8f32(<8 x float> poison, <8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %32 = call <16 x float> @llvm.experimental.vp.reverse.v16f32(<16 x float> poison, <16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %33 = call <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %34 = call <4 x double> @llvm.experimental.vp.reverse.v4f64(<4 x double> poison, <4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %35 = call <8 x double> @llvm.experimental.vp.reverse.v8f64(<8 x double> poison, <8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %36 = call <16 x double> @llvm.experimental.vp.reverse.v16f64(<16 x double> poison, <16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %37 = call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> poison, <vscale x 2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %38 = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> poison, <vscale x 4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %39 = call <vscale x 8 x i1> @llvm.experimental.vp.reverse.nxv8i1(<vscale x 8 x i1> poison, <vscale x 8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %40 = call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> poison, <vscale x 16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %41 = call <vscale x 2 x i8> @llvm.experimental.vp.reverse.nxv2i8(<vscale x 2 x i8> poison, <vscale x 2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %42 = call <vscale x 4 x i8> @llvm.experimental.vp.reverse.nxv4i8(<vscale x 4 x i8> poison, <vscale x 4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %43 = call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %44 = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %45 = call <vscale x 2 x i16> @llvm.experimental.vp.reverse.nxv2i16(<vscale x 2 x i16> poison, <vscale x 2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %46 = call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %47 = call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %48 = call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %49 = call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %50 = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %51 = call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %52 = call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> poison, <vscale x 16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %53 = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %54 = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %55 = call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> poison, <vscale x 8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %56 = call <vscale x 16 x i64> @llvm.experimental.vp.reverse.nxv16i64(<vscale x 16 x i64> poison, <vscale x 16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %57 = call <vscale x 2 x bfloat> @llvm.experimental.vp.reverse.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %58 = call <vscale x 4 x bfloat> @llvm.experimental.vp.reverse.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %59 = call <vscale x 8 x bfloat> @llvm.experimental.vp.reverse.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %60 = call <vscale x 16 x bfloat> @llvm.experimental.vp.reverse.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %61 = call <vscale x 2 x half> @llvm.experimental.vp.reverse.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %62 = call <vscale x 4 x half> @llvm.experimental.vp.reverse.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %63 = call <vscale x 8 x half> @llvm.experimental.vp.reverse.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %64 = call <vscale x 16 x half> @llvm.experimental.vp.reverse.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %65 = call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %66 = call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %67 = call <vscale x 8 x float> @llvm.experimental.vp.reverse.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %68 = call <vscale x 16 x float> @llvm.experimental.vp.reverse.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %69 = call <vscale x 2 x double> @llvm.experimental.vp.reverse.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %70 = call <vscale x 4 x double> @llvm.experimental.vp.reverse.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %71 = call <vscale x 8 x double> @llvm.experimental.vp.reverse.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %72 = call <vscale x 16 x double> @llvm.experimental.vp.reverse.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> poison, <2 x i1> poison, i32 poison)
   call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> poison, <4 x i1> poison, i32 poison)
   call <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1> poison, <8 x i1> poison, i32 poison)
   call <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1> poison, <16 x i1> poison, i32 poison)
   call <2 x i8> @llvm.experimental.vp.reverse.v2i8(<2 x i8> poison, <2 x i1> poison, i32 poison)
   call <4 x i8> @llvm.experimental.vp.reverse.v4i8(<4 x i8> poison, <4 x i1> poison, i32 poison)
   call <8 x i8> @llvm.experimental.vp.reverse.v8i8(<8 x i8> poison, <8 x i1> poison, i32 poison)
   call <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8> poison, <16 x i1> poison, i32 poison)
   call <2 x i16> @llvm.experimental.vp.reverse.v2i16(<2 x i16> poison, <2 x i1> poison, i32 poison)
   call <4 x i16> @llvm.experimental.vp.reverse.v4i16(<4 x i16> poison, <4 x i1> poison, i32 poison)
   call <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16> poison, <8 x i1> poison, i32 poison)
   call <16 x i16> @llvm.experimental.vp.reverse.v16i16(<16 x i16> poison, <16 x i1> poison, i32 poison)
   call <2 x i32> @llvm.experimental.vp.reverse.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
   call <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
   call <8 x i32> @llvm.experimental.vp.reverse.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
   call <16 x i32> @llvm.experimental.vp.reverse.v16i32(<16 x i32> poison, <16 x i1> poison, i32 poison)
   call <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64> poison, <2 x i1> poison, i32 poison)
   call <4 x i64> @llvm.experimental.vp.reverse.v4i64(<4 x i64> poison, <4 x i1> poison, i32 poison)
   call <8 x i64> @llvm.experimental.vp.reverse.v8i64(<8 x i64> poison, <8 x i1> poison, i32 poison)
   call <16 x i64> @llvm.experimental.vp.reverse.v16i64(<16 x i64> poison, <16 x i1> poison, i32 poison)
   call <2 x bfloat> @llvm.experimental.vp.reverse.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison)
   call <4 x bfloat> @llvm.experimental.vp.reverse.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison)
   call <8 x bfloat> @llvm.experimental.vp.reverse.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison)
   call <16 x bfloat> @llvm.experimental.vp.reverse.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison)
   call <2 x half> @llvm.experimental.vp.reverse.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison)
   call <4 x half> @llvm.experimental.vp.reverse.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison)
   call <8 x half> @llvm.experimental.vp.reverse.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison)
   call <16 x half> @llvm.experimental.vp.reverse.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison)
   call <2 x float> @llvm.experimental.vp.reverse.v2f32(<2 x float> poison, <2 x i1> poison, i32 poison)
   call <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float> poison, <4 x i1> poison, i32 poison)
   call <8 x float> @llvm.experimental.vp.reverse.v8f32(<8 x float> poison, <8 x i1> poison, i32 poison)
   call <16 x float> @llvm.experimental.vp.reverse.v16f32(<16 x float> poison, <16 x i1> poison, i32 poison)
   call <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> poison, <2 x i1> poison, i32 poison)
   call <4 x double> @llvm.experimental.vp.reverse.v4f64(<4 x double> poison, <4 x i1> poison, i32 poison)
   call <8 x double> @llvm.experimental.vp.reverse.v8f64(<8 x double> poison, <8 x i1> poison, i32 poison)
   call <16 x double> @llvm.experimental.vp.reverse.v16f64(<16 x double> poison, <16 x i1> poison, i32 poison)
   call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> poison, <vscale x 2 x i1> poison, i32 poison)
   call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> poison, <vscale x 4 x i1> poison, i32 poison)
   call <vscale x 8 x i1> @llvm.experimental.vp.reverse.nxv8i1(<vscale x 8 x i1> poison, <vscale x 8 x i1> poison, i32 poison)
   call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> poison, <vscale x 16 x i1> poison, i32 poison)
   call <vscale x 2 x i8> @llvm.experimental.vp.reverse.nxv2i8(<vscale x 2 x i8> poison, <vscale x 2 x i1> poison, i32 poison)
   call <vscale x 4 x i8> @llvm.experimental.vp.reverse.nxv4i8(<vscale x 4 x i8> poison, <vscale x 4 x i1> poison, i32 poison)
   call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i32 poison)
   call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i32 poison)
   call <vscale x 2 x i16> @llvm.experimental.vp.reverse.nxv2i16(<vscale x 2 x i16> poison, <vscale x 2 x i1> poison, i32 poison)
   call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i32 poison)
   call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i32 poison)
   call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i32 poison)
   call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
   call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
   call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
   call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> poison, <vscale x 16 x i1> poison, i32 poison)
   call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i32 poison)
   call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i32 poison)
   call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> poison, <vscale x 8 x i1> poison, i32 poison)
   call <vscale x 16 x i64> @llvm.experimental.vp.reverse.nxv16i64(<vscale x 16 x i64> poison, <vscale x 16 x i1> poison, i32 poison)
   call <vscale x 2 x bfloat> @llvm.experimental.vp.reverse.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
   call <vscale x 4 x bfloat> @llvm.experimental.vp.reverse.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
   call <vscale x 8 x bfloat> @llvm.experimental.vp.reverse.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
   call <vscale x 16 x bfloat> @llvm.experimental.vp.reverse.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
   call <vscale x 2 x half> @llvm.experimental.vp.reverse.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
   call <vscale x 4 x half> @llvm.experimental.vp.reverse.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
   call <vscale x 8 x half> @llvm.experimental.vp.reverse.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
   call <vscale x 16 x half> @llvm.experimental.vp.reverse.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
   call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
   call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
   call <vscale x 8 x float> @llvm.experimental.vp.reverse.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
   call <vscale x 16 x float> @llvm.experimental.vp.reverse.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
   call <vscale x 2 x double> @llvm.experimental.vp.reverse.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
   call <vscale x 4 x double> @llvm.experimental.vp.reverse.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
   call <vscale x 8 x double> @llvm.experimental.vp.reverse.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
   call <vscale x 16 x double> @llvm.experimental.vp.reverse.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison)

   ret void
 }