include/clang/Basic/riscv_vector.td - llvm-project/clang - Git at Google

 //==--- riscv_vector.td - RISC-V V-ext Builtin function list --------------===//
 //
 //  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 //  See https://llvm.org/LICENSE.txt for license information.
 //  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file defines the builtins for RISC-V V-extension. See:
 //
 //     https://github.com/riscv/rvv-intrinsic-doc
 //
 //===----------------------------------------------------------------------===//

 //===----------------------------------------------------------------------===//
 // Instruction definitions
 //===----------------------------------------------------------------------===//
 // Each record of the class RVVBuiltin defines a collection of builtins (i.e.
 // "def vadd : RVVBuiltin" will be used to define things like "vadd_vv_i32m1",
 // "vadd_vv_i32m2", etc).
 //
 // The elements of this collection are defined by an instantiation process the
 // range of which is specified by the cross product of the LMUL attribute and
 // every element in the attribute TypeRange. By default builtins have LMUL = [1,
 // 2, 4, 8, 1/2, 1/4, 1/8] so the process is repeated 7 times. In tablegen we
 // use the Log2LMUL [0, 1, 2, 3, -1, -2, -3] to represent the LMUL.
 //
 // LMUL represents the fact that the types of values used by that builtin are
 // values generated by instructions that are executed under that LMUL. However,
 // this does not mean the builtin is necessarily lowered into an instruction
 // that executes under the specified LMUL. An example where this happens are
 // loads and stores of masks. A mask like `vbool8_t` can be generated, for
 // instance, by comparing two `__rvv_int8m1_t` (this is LMUL=1) or comparing two
 // `__rvv_int16m2_t` (this is LMUL=2). The actual load or store, however, will
 // be performed under LMUL=1 because mask registers are not grouped.
 //
 // TypeRange is a non-empty sequence of basic types:
 //
 //   c: int8_t (i8)
 //   s: int16_t (i16)
 //   i: int32_t (i32)
 //   l: int64_t (i64)
 //   h: float16_t (half)
 //   f: float32_t (float)
 //   d: float64_t (double)
 //
 // This way, given an LMUL, a record with a TypeRange "sil" will cause the
 // definition of 3 builtins. Each type "t" in the TypeRange (in this example
 // they are int16_t, int32_t, int64_t) is used as a parameter that drives the
 // definition of that particular builtin (for the given LMUL).
 //
 // During the instantiation, types can be transformed or modified using type
 // transformers. Given a type "t" the following primitive type transformers can
 // be applied to it to yield another type.
 //
 //   e: type of "t" as is (identity)
 //   v: computes a vector type whose element type is "t" for the current LMUL
 //   w: computes a vector type identical to what 'v' computes except for the
 //      element type which is twice as wide as the element type of 'v'
 //   q: computes a vector type identical to what 'v' computes except for the
 //      element type which is four times as wide as the element type of 'v'
 //   o: computes a vector type identical to what 'v' computes except for the
 //      element type which is eight times as wide as the element type of 'v'
 //   m: computes a vector type identical to what 'v' computes except for the
 //      element type which is bool
 //   0: void type, ignores "t"
 //   z: size_t, ignores "t"
 //   t: ptrdiff_t, ignores "t"
 //   c: uint8_t, ignores "t"
 //
 // So for instance if t is "i", i.e. int, then "e" will yield int again. "v"
 // will yield an RVV vector type (assume LMUL=1), so __rvv_int32m1_t.
 // Accordingly "w" would yield __rvv_int64m2_t.
 //
 // A type transformer can be prefixed by other non-primitive type transformers.
 //
 //   P: constructs a pointer to the current type
 //   C: adds const to the type
 //   K: requires the integer type to be a constant expression
 //   U: given an integer type or vector type, computes its unsigned variant
 //   I: given a vector type, compute the vector type with integer type
 //      elements of the same width
 //   F: given a vector type, compute the vector type with floating-point type
 //      elements of the same width
 //   S: given a vector type, computes its equivalent one for LMUL=1. This is a
 //      no-op if the vector was already LMUL=1
 //   (Log2EEW:Value): Log2EEW value could be 3/4/5/6 (8/16/32/64), given a
 //      vector type (SEW and LMUL) and EEW (8/16/32/64), computes its
 //      equivalent integer vector type with EEW and corresponding ELMUL (elmul =
 //      (eew/sew) * lmul). For example, vector type is __rvv_float16m4
 //      (SEW=16, LMUL=4) and Log2EEW is 3 (EEW=8), and then equivalent vector
 //      type is __rvv_uint8m2_t (elmul=(8/16)*4 = 2). Ignore to define a new
 //      builtins if its equivalent type has illegal lmul.
 //
 // Following with the example above, if t is "i", then "Ue" will yield unsigned
 // int and "Fv" will yield __rvv_float32m1_t (again assuming LMUL=1), Fw would
 // yield __rvv_float64m2_t, etc.
 //
 // Each builtin is then defined by applying each type in TypeRange against the
 // sequence of type transformers described in Suffix and Prototype.
 //
 // The name of the builtin is defined by the Name attribute (which defaults to
 // the name of the class) appended (separated with an underscore) the Suffix
 // attribute. For instance with Name="foo", Suffix = "v" and TypeRange = "il",
 // the builtin generated will be __builtin_rvv_foo_i32m1 and
 // __builtin_rvv_foo_i64m1 (under LMUL=1). If Suffix contains more than one
 // type transformer (say "vv") each of the types is separated with an
 // underscore as in "__builtin_rvv_foo_i32m1_i32m1".
 //
 // The C/C++ prototype of the builtin is defined by the Prototype attribute.
 // Prototype is a non-empty sequence of type transformers, the first of which
 // is the return type of the builtin and the rest are the parameters of the
 // builtin, in order. For instance if Prototype is "wvv" and TypeRange is "si"
 // a first builtin will have type
 // __rvv_int32m2_t (__rvv_int16m1_t, __rvv_int16m1_t) and the second builtin
 // will have type __rvv_int64m2_t (__rvv_int32m1_t, __rvv_int32m1_t) (again
 // under LMUL=1).
 //
 // There are a number of attributes that are used to constraint the number and
 // shape of the builtins generated. Refer to the comments below for them.
 class RVVBuiltin<string suffix, string prototype, string type_range,
                  string managed_suffix = ""> {
   // Base name that will be prepended in __builtin_rvv_ and appended the
   // computed Suffix.
   string Name = NAME;

   // If not empty, each instantiated builtin will have this appended after an
   // underscore (_). It is instantiated like Prototype.
   string Suffix = suffix;

   // If empty, default MangledName is sub string of `Name` which end of first
   // '_'. For example, the default mangled name  is `vadd` for Name `vadd_vv`.
   // It's used for describe some special naming cases.
   string MangledName = "";

   // The different variants of the builtin, parameterised with a type.
   string TypeRange = type_range;

   // We use each type described in TypeRange and LMUL with prototype to
   // instantiate a specific element of the set of builtins being defined.
   // Prototype attribute defines the C/C++ prototype of the builtin. It is a
   // non-empty sequence of type transformers, the first of which is the return
   // type of the builtin and the rest are the parameters of the builtin, in
   // order. For instance if Prototype is "wvv", TypeRange is "si" and LMUL=1, a
   // first builtin will have type
   // __rvv_int32m2_t (__rvv_int16m1_t, __rvv_int16m1_t), and the second builtin
   // will have type __rvv_int64m2_t (__rvv_int32m1_t, __rvv_int32m1_t).
   string Prototype = prototype;

   // This builtin has a masked form.
   bit HasMask = true;

   // If HasMask, this flag states that this builtin has a maskedoff operand. It
   // is always the first operand in builtin and IR intrinsic.
   bit HasMaskedOffOperand = true;

   // This builtin has a granted vector length parameter in the last position.
   bit HasVL = true;

   // This builtin supports function overloading and has a mangled name.
   bit HasGeneric = true;

   // Reads or writes "memory" or has other side-effects.
   bit HasSideEffects = false;

   // This builtin is valid for the given Log2LMULs.
   list<int> Log2LMUL = [0, 1, 2, 3, -1, -2, -3];

   // Manual code in clang codegen riscv_vector_builtin_cg.inc
   code ManualCodegen = [{}];
   code ManualCodegenMask = [{}];

   // When emit the automatic clang codegen, it describes what types we have to use
   // to obtain the specific LLVM intrinsic. -1 means the return type, otherwise,
   // k >= 0 meaning the k-th operand (counting from zero) of the codegen'd
   // parameter of the unmasked version. k can't be the mask operand's position.
   list<int> IntrinsicTypes = [];

   // When the order of the parameters of clang builtin do not match the order of
   // C/C++ api, we use permutation index to mapping the operand from clang
   // builtin to C/C++. It is parameter of the unmasked version without VL
   // operand. If empty, the default permutation is [0, 1, 2, ...].
   list<int> PermuteOperands = [];

   // If these names are not empty, this is the ID of the LLVM intrinsic
   // we want to lower to.
   string IRName = NAME;

   // If HasMask, this is the ID of the LLVM intrinsic we want to lower to.
   string IRNameMask = NAME #"_mask";

   // If non empty, this is the code emitted in the header, otherwise
   // an automatic definition in header is emitted.
   string HeaderCode = "";

 }

 //===----------------------------------------------------------------------===//
 // Basic classes with automatic codegen.
 //===----------------------------------------------------------------------===//

 class RVVBinBuiltin<string suffix, string prototype, string type_range>
     : RVVBuiltin<suffix, prototype, type_range> {
   let IntrinsicTypes = [-1, 1];
 }

 multiclass RVVBinBuiltinSet<string intrinsic_name, string type_range,
                             list<list<string>> suffixes_prototypes> {
   let IRName = intrinsic_name, IRNameMask = intrinsic_name # "_mask" in {
     foreach s_p = suffixes_prototypes in {
       let Name = NAME # "_" # s_p[0] in {
         defvar suffix = s_p[1];
         defvar prototype = s_p[2];
         def : RVVBinBuiltin<suffix, prototype, type_range>;
       }
     }
   }
 }

 defvar TypeList = ["c","s","i","l","f","d"];
 defvar EEWList = [["8", "(Log2EEW:3)"],
                   ["16", "(Log2EEW:4)"],
                   ["32", "(Log2EEW:5)"],
                   ["64", "(Log2EEW:6)"]];

 class IsFloat<string type> {
   bit val = !or(!eq(type, "h"), !eq(type, "f"), !eq(type, "d"));
 }

 multiclass RVVVLEBuiltin<list<string> types> {
   let Name = NAME # "_v",
       IRName = "vle",
       IRNameMask ="vle_mask",
       HasGeneric = false,
       ManualCodegen = [{
         IntrinsicTypes = {ResultType, Ops[1]->getType()};
         Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo());
       }],
       ManualCodegenMask= [{
         IntrinsicTypes = {ResultType, Ops[3]->getType()};
         Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo());
       }] in {
     foreach type = types in {
       def : RVVBuiltin<"v", "vPCe", type>;
       if !not(IsFloat<type>.val) then {
         def : RVVBuiltin<"Uv", "UvPCUe", type>;
       }
     }
   }
 }

 multiclass RVVIndexedLoad<string op> {
   let HasGeneric = false,
       ManualCodegen = [{
         IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[2]->getType()};
         Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo());
       }],
       ManualCodegenMask = [{
         IntrinsicTypes = {ResultType, Ops[2]->getType(), Ops[4]->getType()};
         Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo());
       }] in {
       foreach type = TypeList in {
         foreach eew_list = EEWList in {
           defvar eew = eew_list[0];
           defvar eew_type = eew_list[1];
           let Name = op # eew # "_v", IRName = op, IRNameMask = op # "_mask" in {
             def: RVVBuiltin<"v", "vPCe" # eew_type # "Uv", type>;
               if !not(IsFloat<type>.val) then {
                 def: RVVBuiltin<"Uv", "UvPCUe" # eew_type # "Uv", type>;
               }
           }
         }
       }
   }
 }

 multiclass RVVVSEBuiltin<list<string> types> {
   let Name = NAME # "_v",
       IRName = "vse",
       IRNameMask = "vse_mask",
       HasMaskedOffOperand = false,
       PermuteOperands = [1, 0], // C/C++ Operand: (ptr, value, vl). Builtin: (value, ptr, vl)
       HasGeneric = false,
       ManualCodegen = [{
         Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo());
         IntrinsicTypes = {Ops[0]->getType(), Ops[2]->getType()};
       }],
       ManualCodegenMask= [{
         Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo());
         IntrinsicTypes = {Ops[0]->getType(), Ops[3]->getType()};
       }] in {
     foreach type = types in {
       def : RVVBuiltin<"v", "0vPe", type>;
       if !not(IsFloat<type>.val) then {
         def : RVVBuiltin<"Uv", "0UvPUe", type>;
       }
     }
   }
 }

 // 6. Configuration-Setting Instructions
 // 6.1. vsetvli/vsetvl instructions
 let HasVL = false,
     HasMask = false,
     HasSideEffects = true,
     HasGeneric = false,
     Log2LMUL = [0],
     ManualCodegen = [{IntrinsicTypes = {ResultType};}] in // Set XLEN type
 {
   // vsetvl is a macro because for it require constant integers in SEW and LMUL.
   let HeaderCode =
 [{
 #define vsetvl_e8mf8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 5)
 #define vsetvl_e8mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 6)
 #define vsetvl_e8mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 7)
 #define vsetvl_e8m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 0)
 #define vsetvl_e8m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 1)
 #define vsetvl_e8m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 2)
 #define vsetvl_e8m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 3)

 #define vsetvl_e16mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 6)
 #define vsetvl_e16mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 7)
 #define vsetvl_e16m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 0)
 #define vsetvl_e16m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 1)
 #define vsetvl_e16m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 2)
 #define vsetvl_e16m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 3)

 #define vsetvl_e32mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 7)
 #define vsetvl_e32m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 0)
 #define vsetvl_e32m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 1)
 #define vsetvl_e32m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 2)
 #define vsetvl_e32m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 3)

 #define vsetvl_e64m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 0)
 #define vsetvl_e64m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 1)
 #define vsetvl_e64m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 2)
 #define vsetvl_e64m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 3)

 }] in
   def vsetvli : RVVBuiltin<"", "zzKzKz", "i">;

   let HeaderCode =
 [{
 #define vsetvlmax_e8mf8() __builtin_rvv_vsetvlimax(0, 5)
 #define vsetvlmax_e8mf4() __builtin_rvv_vsetvlimax(0, 6)
 #define vsetvlmax_e8mf2() __builtin_rvv_vsetvlimax(0, 7)
 #define vsetvlmax_e8m1() __builtin_rvv_vsetvlimax(0, 0)
 #define vsetvlmax_e8m2() __builtin_rvv_vsetvlimax(0, 1)
 #define vsetvlmax_e8m4() __builtin_rvv_vsetvlimax(0, 2)
 #define vsetvlmax_e8m8() __builtin_rvv_vsetvlimax(0, 3)

 #define vsetvlmax_e16mf4() __builtin_rvv_vsetvlimax(1, 6)
 #define vsetvlmax_e16mf2() __builtin_rvv_vsetvlimax(1, 7)
 #define vsetvlmax_e16m1() __builtin_rvv_vsetvlimax(1, 0)
 #define vsetvlmax_e16m2() __builtin_rvv_vsetvlimax(1, 1)
 #define vsetvlmax_e16m4() __builtin_rvv_vsetvlimax(1, 2)
 #define vsetvlmax_e16m8() __builtin_rvv_vsetvlimax(1, 3)

 #define vsetvlmax_e32mf2() __builtin_rvv_vsetvlimax(2, 7)
 #define vsetvlmax_e32m1() __builtin_rvv_vsetvlimax(2, 0)
 #define vsetvlmax_e32m2() __builtin_rvv_vsetvlimax(2, 1)
 #define vsetvlmax_e32m4() __builtin_rvv_vsetvlimax(2, 2)
 #define vsetvlmax_e32m8() __builtin_rvv_vsetvlimax(2, 3)

 #define vsetvlmax_e64m1() __builtin_rvv_vsetvlimax(3, 0)
 #define vsetvlmax_e64m2() __builtin_rvv_vsetvlimax(3, 1)
 #define vsetvlmax_e64m4() __builtin_rvv_vsetvlimax(3, 2)
 #define vsetvlmax_e64m8() __builtin_rvv_vsetvlimax(3, 3)

 }] in
   def vsetvlimax : RVVBuiltin<"", "zKzKz", "i">;
 }

 // 7. Vector Loads and Stores
 // 7.4. Vector Unit-Stride Instructions
 defm vle8: RVVVLEBuiltin<["c"]>;
 defm vle16: RVVVLEBuiltin<["s"]>;
 defm vle32: RVVVLEBuiltin<["i","f"]>;
 defm vle64: RVVVLEBuiltin<["l","d"]>;

 defm vse8 : RVVVSEBuiltin<["c"]>;
 defm vse16: RVVVSEBuiltin<["s"]>;
 defm vse32: RVVVSEBuiltin<["i","f"]>;
 defm vse64: RVVVSEBuiltin<["l","d"]>;

 // 7.6. Vector Indexed Instructions
 defm : RVVIndexedLoad<"vluxei">;
 defm : RVVIndexedLoad<"vloxei">;

 // 12. Vector Integer Arithmetic Instructions
 // 12.1. Vector Single-Width Integer Add and Subtract
 defm vadd : RVVBinBuiltinSet<"vadd", "csil",
                              [["vv", "v", "vvv"],
                               ["vx", "v", "vve"],
                               ["vv", "Uv", "UvUvUv"],
                               ["vx", "Uv", "UvUvUe"]]>;

 // 14. Vector Floating-Point Instructions
 // 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
 defm vfadd : RVVBinBuiltinSet<"vfadd", "fd",
                               [["vv", "v", "vvv"],
                                ["vf", "v", "vve"]]>;
	//==--- riscv_vector.td - RISC-V V-ext Builtin function list --------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines the builtins for RISC-V V-extension. See:
	//
	// https://github.com/riscv/rvv-intrinsic-doc
	//
	//===----------------------------------------------------------------------===//

	//===----------------------------------------------------------------------===//
	// Instruction definitions
	//===----------------------------------------------------------------------===//
	// Each record of the class RVVBuiltin defines a collection of builtins (i.e.
	// "def vadd : RVVBuiltin" will be used to define things like "vadd_vv_i32m1",
	// "vadd_vv_i32m2", etc).
	//
	// The elements of this collection are defined by an instantiation process the
	// range of which is specified by the cross product of the LMUL attribute and
	// every element in the attribute TypeRange. By default builtins have LMUL = [1,
	// 2, 4, 8, 1/2, 1/4, 1/8] so the process is repeated 7 times. In tablegen we
	// use the Log2LMUL [0, 1, 2, 3, -1, -2, -3] to represent the LMUL.
	//
	// LMUL represents the fact that the types of values used by that builtin are
	// values generated by instructions that are executed under that LMUL. However,
	// this does not mean the builtin is necessarily lowered into an instruction
	// that executes under the specified LMUL. An example where this happens are
	// loads and stores of masks. A mask like `vbool8_t` can be generated, for
	// instance, by comparing two `__rvv_int8m1_t` (this is LMUL=1) or comparing two
	// `__rvv_int16m2_t` (this is LMUL=2). The actual load or store, however, will
	// be performed under LMUL=1 because mask registers are not grouped.
	//
	// TypeRange is a non-empty sequence of basic types:
	//
	// c: int8_t (i8)
	// s: int16_t (i16)
	// i: int32_t (i32)
	// l: int64_t (i64)
	// h: float16_t (half)
	// f: float32_t (float)
	// d: float64_t (double)
	//
	// This way, given an LMUL, a record with a TypeRange "sil" will cause the
	// definition of 3 builtins. Each type "t" in the TypeRange (in this example
	// they are int16_t, int32_t, int64_t) is used as a parameter that drives the
	// definition of that particular builtin (for the given LMUL).
	//
	// During the instantiation, types can be transformed or modified using type
	// transformers. Given a type "t" the following primitive type transformers can
	// be applied to it to yield another type.
	//
	// e: type of "t" as is (identity)
	// v: computes a vector type whose element type is "t" for the current LMUL
	// w: computes a vector type identical to what 'v' computes except for the
	// element type which is twice as wide as the element type of 'v'
	// q: computes a vector type identical to what 'v' computes except for the
	// element type which is four times as wide as the element type of 'v'
	// o: computes a vector type identical to what 'v' computes except for the
	// element type which is eight times as wide as the element type of 'v'
	// m: computes a vector type identical to what 'v' computes except for the
	// element type which is bool
	// 0: void type, ignores "t"
	// z: size_t, ignores "t"
	// t: ptrdiff_t, ignores "t"
	// c: uint8_t, ignores "t"
	//
	// So for instance if t is "i", i.e. int, then "e" will yield int again. "v"
	// will yield an RVV vector type (assume LMUL=1), so __rvv_int32m1_t.
	// Accordingly "w" would yield __rvv_int64m2_t.
	//
	// A type transformer can be prefixed by other non-primitive type transformers.
	//
	// P: constructs a pointer to the current type
	// C: adds const to the type
	// K: requires the integer type to be a constant expression
	// U: given an integer type or vector type, computes its unsigned variant
	// I: given a vector type, compute the vector type with integer type
	// elements of the same width
	// F: given a vector type, compute the vector type with floating-point type
	// elements of the same width
	// S: given a vector type, computes its equivalent one for LMUL=1. This is a
	// no-op if the vector was already LMUL=1
	// (Log2EEW:Value): Log2EEW value could be 3/4/5/6 (8/16/32/64), given a
	// vector type (SEW and LMUL) and EEW (8/16/32/64), computes its
	// equivalent integer vector type with EEW and corresponding ELMUL (elmul =
	// (eew/sew) * lmul). For example, vector type is __rvv_float16m4
	// (SEW=16, LMUL=4) and Log2EEW is 3 (EEW=8), and then equivalent vector
	// type is __rvv_uint8m2_t (elmul=(8/16)*4 = 2). Ignore to define a new
	// builtins if its equivalent type has illegal lmul.
	//
	// Following with the example above, if t is "i", then "Ue" will yield unsigned
	// int and "Fv" will yield __rvv_float32m1_t (again assuming LMUL=1), Fw would
	// yield __rvv_float64m2_t, etc.
	//
	// Each builtin is then defined by applying each type in TypeRange against the
	// sequence of type transformers described in Suffix and Prototype.
	//
	// The name of the builtin is defined by the Name attribute (which defaults to
	// the name of the class) appended (separated with an underscore) the Suffix
	// attribute. For instance with Name="foo", Suffix = "v" and TypeRange = "il",
	// the builtin generated will be __builtin_rvv_foo_i32m1 and
	// __builtin_rvv_foo_i64m1 (under LMUL=1). If Suffix contains more than one
	// type transformer (say "vv") each of the types is separated with an
	// underscore as in "__builtin_rvv_foo_i32m1_i32m1".
	//
	// The C/C++ prototype of the builtin is defined by the Prototype attribute.
	// Prototype is a non-empty sequence of type transformers, the first of which
	// is the return type of the builtin and the rest are the parameters of the
	// builtin, in order. For instance if Prototype is "wvv" and TypeRange is "si"
	// a first builtin will have type
	// __rvv_int32m2_t (__rvv_int16m1_t, __rvv_int16m1_t) and the second builtin
	// will have type __rvv_int64m2_t (__rvv_int32m1_t, __rvv_int32m1_t) (again
	// under LMUL=1).
	//
	// There are a number of attributes that are used to constraint the number and
	// shape of the builtins generated. Refer to the comments below for them.
	class RVVBuiltin<string suffix, string prototype, string type_range,
	string managed_suffix = ""> {
	// Base name that will be prepended in __builtin_rvv_ and appended the
	// computed Suffix.
	string Name = NAME;

	// If not empty, each instantiated builtin will have this appended after an
	// underscore (_). It is instantiated like Prototype.
	string Suffix = suffix;

	// If empty, default MangledName is sub string of `Name` which end of first
	// '_'. For example, the default mangled name is `vadd` for Name `vadd_vv`.
	// It's used for describe some special naming cases.
	string MangledName = "";

	// The different variants of the builtin, parameterised with a type.
	string TypeRange = type_range;

	// We use each type described in TypeRange and LMUL with prototype to
	// instantiate a specific element of the set of builtins being defined.
	// Prototype attribute defines the C/C++ prototype of the builtin. It is a
	// non-empty sequence of type transformers, the first of which is the return
	// type of the builtin and the rest are the parameters of the builtin, in
	// order. For instance if Prototype is "wvv", TypeRange is "si" and LMUL=1, a
	// first builtin will have type
	// __rvv_int32m2_t (__rvv_int16m1_t, __rvv_int16m1_t), and the second builtin
	// will have type __rvv_int64m2_t (__rvv_int32m1_t, __rvv_int32m1_t).
	string Prototype = prototype;

	// This builtin has a masked form.
	bit HasMask = true;

	// If HasMask, this flag states that this builtin has a maskedoff operand. It
	// is always the first operand in builtin and IR intrinsic.
	bit HasMaskedOffOperand = true;

	// This builtin has a granted vector length parameter in the last position.
	bit HasVL = true;

	// This builtin supports function overloading and has a mangled name.
	bit HasGeneric = true;

	// Reads or writes "memory" or has other side-effects.
	bit HasSideEffects = false;

	// This builtin is valid for the given Log2LMULs.
	list<int> Log2LMUL = [0, 1, 2, 3, -1, -2, -3];

	// Manual code in clang codegen riscv_vector_builtin_cg.inc
	code ManualCodegen = [{}];
	code ManualCodegenMask = [{}];

	// When emit the automatic clang codegen, it describes what types we have to use
	// to obtain the specific LLVM intrinsic. -1 means the return type, otherwise,
	// k >= 0 meaning the k-th operand (counting from zero) of the codegen'd
	// parameter of the unmasked version. k can't be the mask operand's position.
	list<int> IntrinsicTypes = [];

	// When the order of the parameters of clang builtin do not match the order of
	// C/C++ api, we use permutation index to mapping the operand from clang
	// builtin to C/C++. It is parameter of the unmasked version without VL
	// operand. If empty, the default permutation is [0, 1, 2, ...].
	list<int> PermuteOperands = [];

	// If these names are not empty, this is the ID of the LLVM intrinsic
	// we want to lower to.
	string IRName = NAME;

	// If HasMask, this is the ID of the LLVM intrinsic we want to lower to.
	string IRNameMask = NAME #"_mask";

	// If non empty, this is the code emitted in the header, otherwise
	// an automatic definition in header is emitted.
	string HeaderCode = "";

	}

	//===----------------------------------------------------------------------===//
	// Basic classes with automatic codegen.
	//===----------------------------------------------------------------------===//

	class RVVBinBuiltin<string suffix, string prototype, string type_range>
	: RVVBuiltin<suffix, prototype, type_range> {
	let IntrinsicTypes = [-1, 1];
	}

	multiclass RVVBinBuiltinSet<string intrinsic_name, string type_range,
	list<list<string>> suffixes_prototypes> {
	let IRName = intrinsic_name, IRNameMask = intrinsic_name # "_mask" in {
	foreach s_p = suffixes_prototypes in {
	let Name = NAME # "_" # s_p[0] in {
	defvar suffix = s_p[1];
	defvar prototype = s_p[2];
	def : RVVBinBuiltin<suffix, prototype, type_range>;
	}
	}
	}
	}

	defvar TypeList = ["c","s","i","l","f","d"];
	defvar EEWList = [["8", "(Log2EEW:3)"],
	["16", "(Log2EEW:4)"],
	["32", "(Log2EEW:5)"],
	["64", "(Log2EEW:6)"]];

	class IsFloat<string type> {
	bit val = !or(!eq(type, "h"), !eq(type, "f"), !eq(type, "d"));
	}

	multiclass RVVVLEBuiltin<list<string> types> {
	let Name = NAME # "_v",
	IRName = "vle",
	IRNameMask ="vle_mask",
	HasGeneric = false,
	ManualCodegen = [{
	IntrinsicTypes = {ResultType, Ops[1]->getType()};
	Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo());
	}],
	ManualCodegenMask= [{
	IntrinsicTypes = {ResultType, Ops[3]->getType()};
	Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo());
	}] in {
	foreach type = types in {
	def : RVVBuiltin<"v", "vPCe", type>;
	if !not(IsFloat<type>.val) then {
	def : RVVBuiltin<"Uv", "UvPCUe", type>;
	}
	}
	}
	}

	multiclass RVVIndexedLoad<string op> {
	let HasGeneric = false,
	ManualCodegen = [{
	IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[2]->getType()};
	Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo());
	}],
	ManualCodegenMask = [{
	IntrinsicTypes = {ResultType, Ops[2]->getType(), Ops[4]->getType()};
	Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo());
	}] in {
	foreach type = TypeList in {
	foreach eew_list = EEWList in {
	defvar eew = eew_list[0];
	defvar eew_type = eew_list[1];
	let Name = op # eew # "_v", IRName = op, IRNameMask = op # "_mask" in {
	def: RVVBuiltin<"v", "vPCe" # eew_type # "Uv", type>;
	if !not(IsFloat<type>.val) then {
	def: RVVBuiltin<"Uv", "UvPCUe" # eew_type # "Uv", type>;
	}
	}
	}
	}
	}
	}

	multiclass RVVVSEBuiltin<list<string> types> {
	let Name = NAME # "_v",
	IRName = "vse",
	IRNameMask = "vse_mask",
	HasMaskedOffOperand = false,
	PermuteOperands = [1, 0], // C/C++ Operand: (ptr, value, vl). Builtin: (value, ptr, vl)
	HasGeneric = false,
	ManualCodegen = [{
	Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo());
	IntrinsicTypes = {Ops[0]->getType(), Ops[2]->getType()};
	}],
	ManualCodegenMask= [{
	Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo());
	IntrinsicTypes = {Ops[0]->getType(), Ops[3]->getType()};
	}] in {
	foreach type = types in {
	def : RVVBuiltin<"v", "0vPe", type>;
	if !not(IsFloat<type>.val) then {
	def : RVVBuiltin<"Uv", "0UvPUe", type>;
	}
	}
	}
	}

	// 6. Configuration-Setting Instructions
	// 6.1. vsetvli/vsetvl instructions
	let HasVL = false,
	HasMask = false,
	HasSideEffects = true,
	HasGeneric = false,
	Log2LMUL = [0],
	ManualCodegen = [{IntrinsicTypes = {ResultType};}] in // Set XLEN type
	{
	// vsetvl is a macro because for it require constant integers in SEW and LMUL.
	let HeaderCode =
	[{
	#define vsetvl_e8mf8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 5)
	#define vsetvl_e8mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 6)
	#define vsetvl_e8mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 7)
	#define vsetvl_e8m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 0)
	#define vsetvl_e8m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 1)
	#define vsetvl_e8m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 2)
	#define vsetvl_e8m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 3)

	#define vsetvl_e16mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 6)
	#define vsetvl_e16mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 7)
	#define vsetvl_e16m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 0)
	#define vsetvl_e16m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 1)
	#define vsetvl_e16m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 2)
	#define vsetvl_e16m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 3)

	#define vsetvl_e32mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 7)
	#define vsetvl_e32m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 0)
	#define vsetvl_e32m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 1)
	#define vsetvl_e32m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 2)
	#define vsetvl_e32m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 3)

	#define vsetvl_e64m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 0)
	#define vsetvl_e64m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 1)
	#define vsetvl_e64m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 2)
	#define vsetvl_e64m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 3)

	}] in
	def vsetvli : RVVBuiltin<"", "zzKzKz", "i">;

	let HeaderCode =
	[{
	#define vsetvlmax_e8mf8() __builtin_rvv_vsetvlimax(0, 5)
	#define vsetvlmax_e8mf4() __builtin_rvv_vsetvlimax(0, 6)
	#define vsetvlmax_e8mf2() __builtin_rvv_vsetvlimax(0, 7)
	#define vsetvlmax_e8m1() __builtin_rvv_vsetvlimax(0, 0)
	#define vsetvlmax_e8m2() __builtin_rvv_vsetvlimax(0, 1)
	#define vsetvlmax_e8m4() __builtin_rvv_vsetvlimax(0, 2)
	#define vsetvlmax_e8m8() __builtin_rvv_vsetvlimax(0, 3)

	#define vsetvlmax_e16mf4() __builtin_rvv_vsetvlimax(1, 6)
	#define vsetvlmax_e16mf2() __builtin_rvv_vsetvlimax(1, 7)
	#define vsetvlmax_e16m1() __builtin_rvv_vsetvlimax(1, 0)
	#define vsetvlmax_e16m2() __builtin_rvv_vsetvlimax(1, 1)
	#define vsetvlmax_e16m4() __builtin_rvv_vsetvlimax(1, 2)
	#define vsetvlmax_e16m8() __builtin_rvv_vsetvlimax(1, 3)

	#define vsetvlmax_e32mf2() __builtin_rvv_vsetvlimax(2, 7)
	#define vsetvlmax_e32m1() __builtin_rvv_vsetvlimax(2, 0)
	#define vsetvlmax_e32m2() __builtin_rvv_vsetvlimax(2, 1)
	#define vsetvlmax_e32m4() __builtin_rvv_vsetvlimax(2, 2)
	#define vsetvlmax_e32m8() __builtin_rvv_vsetvlimax(2, 3)

	#define vsetvlmax_e64m1() __builtin_rvv_vsetvlimax(3, 0)
	#define vsetvlmax_e64m2() __builtin_rvv_vsetvlimax(3, 1)
	#define vsetvlmax_e64m4() __builtin_rvv_vsetvlimax(3, 2)
	#define vsetvlmax_e64m8() __builtin_rvv_vsetvlimax(3, 3)

	}] in
	def vsetvlimax : RVVBuiltin<"", "zKzKz", "i">;
	}

	// 7. Vector Loads and Stores
	// 7.4. Vector Unit-Stride Instructions
	defm vle8: RVVVLEBuiltin<["c"]>;
	defm vle16: RVVVLEBuiltin<["s"]>;
	defm vle32: RVVVLEBuiltin<["i","f"]>;
	defm vle64: RVVVLEBuiltin<["l","d"]>;

	defm vse8 : RVVVSEBuiltin<["c"]>;
	defm vse16: RVVVSEBuiltin<["s"]>;
	defm vse32: RVVVSEBuiltin<["i","f"]>;
	defm vse64: RVVVSEBuiltin<["l","d"]>;

	// 7.6. Vector Indexed Instructions
	defm : RVVIndexedLoad<"vluxei">;
	defm : RVVIndexedLoad<"vloxei">;

	// 12. Vector Integer Arithmetic Instructions
	// 12.1. Vector Single-Width Integer Add and Subtract
	defm vadd : RVVBinBuiltinSet<"vadd", "csil",
	[["vv", "v", "vvv"],
	["vx", "v", "vve"],
	["vv", "Uv", "UvUvUv"],
	["vx", "Uv", "UvUvUe"]]>;

	// 14. Vector Floating-Point Instructions
	// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
	defm vfadd : RVVBinBuiltinSet<"vfadd", "fd",
	[["vv", "v", "vvv"],
	["vf", "v", "vve"]]>;