| //==--- riscv_vector.td - RISC-V V-ext Builtin function list --------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines the builtins for RISC-V V-extension. See: |
| // |
| // https://github.com/riscv/rvv-intrinsic-doc |
| // |
| //===----------------------------------------------------------------------===// |
| |
| //===----------------------------------------------------------------------===// |
| // Instruction definitions |
| //===----------------------------------------------------------------------===// |
| // Each record of the class RVVBuiltin defines a collection of builtins (i.e. |
| // "def vadd : RVVBuiltin" will be used to define things like "vadd_vv_i32m1", |
| // "vadd_vv_i32m2", etc). |
| // |
| // The elements of this collection are defined by an instantiation process the |
| // range of which is specified by the cross product of the LMUL attribute and |
| // every element in the attribute TypeRange. By default builtins have LMUL = [1, |
| // 2, 4, 8, 1/2, 1/4, 1/8] so the process is repeated 7 times. In tablegen we |
| // use the Log2LMUL [0, 1, 2, 3, -1, -2, -3] to represent the LMUL. |
| // |
| // LMUL represents the fact that the types of values used by that builtin are |
| // values generated by instructions that are executed under that LMUL. However, |
| // this does not mean the builtin is necessarily lowered into an instruction |
| // that executes under the specified LMUL. An example where this happens are |
| // loads and stores of masks. A mask like `vbool8_t` can be generated, for |
| // instance, by comparing two `__rvv_int8m1_t` (this is LMUL=1) or comparing two |
| // `__rvv_int16m2_t` (this is LMUL=2). The actual load or store, however, will |
| // be performed under LMUL=1 because mask registers are not grouped. |
| // |
| // TypeRange is a non-empty sequence of basic types: |
| // |
| // c: int8_t (i8) |
| // s: int16_t (i16) |
| // i: int32_t (i32) |
| // l: int64_t (i64) |
| // h: float16_t (half) |
| // f: float32_t (float) |
| // d: float64_t (double) |
| // |
| // This way, given an LMUL, a record with a TypeRange "sil" will cause the |
| // definition of 3 builtins. Each type "t" in the TypeRange (in this example |
| // they are int16_t, int32_t, int64_t) is used as a parameter that drives the |
| // definition of that particular builtin (for the given LMUL). |
| // |
| // During the instantiation, types can be transformed or modified using type |
| // transformers. Given a type "t" the following primitive type transformers can |
| // be applied to it to yield another type. |
| // |
| // e: type of "t" as is (identity) |
| // v: computes a vector type whose element type is "t" for the current LMUL |
| // w: computes a vector type identical to what 'v' computes except for the |
| // element type which is twice as wide as the element type of 'v' |
| // q: computes a vector type identical to what 'v' computes except for the |
| // element type which is four times as wide as the element type of 'v' |
| // o: computes a vector type identical to what 'v' computes except for the |
| // element type which is eight times as wide as the element type of 'v' |
| // m: computes a vector type identical to what 'v' computes except for the |
| // element type which is bool |
| // 0: void type, ignores "t" |
| // z: size_t, ignores "t" |
| // t: ptrdiff_t, ignores "t" |
| // c: uint8_t, ignores "t" |
| // |
| // So for instance if t is "i", i.e. int, then "e" will yield int again. "v" |
| // will yield an RVV vector type (assume LMUL=1), so __rvv_int32m1_t. |
| // Accordingly "w" would yield __rvv_int64m2_t. |
| // |
| // A type transformer can be prefixed by other non-primitive type transformers. |
| // |
| // P: constructs a pointer to the current type |
| // C: adds const to the type |
| // K: requires the integer type to be a constant expression |
| // U: given an integer type or vector type, computes its unsigned variant |
| // I: given a vector type, compute the vector type with integer type |
| // elements of the same width |
| // F: given a vector type, compute the vector type with floating-point type |
| // elements of the same width |
| // S: given a vector type, computes its equivalent one for LMUL=1. This is a |
| // no-op if the vector was already LMUL=1 |
| // (Log2EEW:Value): Log2EEW value could be 3/4/5/6 (8/16/32/64), given a |
| // vector type (SEW and LMUL) and EEW (8/16/32/64), computes its |
| // equivalent integer vector type with EEW and corresponding ELMUL (elmul = |
| // (eew/sew) * lmul). For example, vector type is __rvv_float16m4 |
| // (SEW=16, LMUL=4) and Log2EEW is 3 (EEW=8), and then equivalent vector |
| // type is __rvv_uint8m2_t (elmul=(8/16)*4 = 2). Ignore to define a new |
| // builtins if its equivalent type has illegal lmul. |
| // |
| // Following with the example above, if t is "i", then "Ue" will yield unsigned |
| // int and "Fv" will yield __rvv_float32m1_t (again assuming LMUL=1), Fw would |
| // yield __rvv_float64m2_t, etc. |
| // |
| // Each builtin is then defined by applying each type in TypeRange against the |
| // sequence of type transformers described in Suffix and Prototype. |
| // |
| // The name of the builtin is defined by the Name attribute (which defaults to |
| // the name of the class) appended (separated with an underscore) the Suffix |
| // attribute. For instance with Name="foo", Suffix = "v" and TypeRange = "il", |
| // the builtin generated will be __builtin_rvv_foo_i32m1 and |
| // __builtin_rvv_foo_i64m1 (under LMUL=1). If Suffix contains more than one |
| // type transformer (say "vv") each of the types is separated with an |
| // underscore as in "__builtin_rvv_foo_i32m1_i32m1". |
| // |
| // The C/C++ prototype of the builtin is defined by the Prototype attribute. |
| // Prototype is a non-empty sequence of type transformers, the first of which |
| // is the return type of the builtin and the rest are the parameters of the |
| // builtin, in order. For instance if Prototype is "wvv" and TypeRange is "si" |
| // a first builtin will have type |
| // __rvv_int32m2_t (__rvv_int16m1_t, __rvv_int16m1_t) and the second builtin |
| // will have type __rvv_int64m2_t (__rvv_int32m1_t, __rvv_int32m1_t) (again |
| // under LMUL=1). |
| // |
| // There are a number of attributes that are used to constraint the number and |
| // shape of the builtins generated. Refer to the comments below for them. |
| class RVVBuiltin<string suffix, string prototype, string type_range, |
| string managed_suffix = ""> { |
| // Base name that will be prepended in __builtin_rvv_ and appended the |
| // computed Suffix. |
| string Name = NAME; |
| |
| // If not empty, each instantiated builtin will have this appended after an |
| // underscore (_). It is instantiated like Prototype. |
| string Suffix = suffix; |
| |
| // If empty, default MangledName is sub string of `Name` which end of first |
| // '_'. For example, the default mangled name is `vadd` for Name `vadd_vv`. |
| // It's used for describe some special naming cases. |
| string MangledName = ""; |
| |
| // The different variants of the builtin, parameterised with a type. |
| string TypeRange = type_range; |
| |
| // We use each type described in TypeRange and LMUL with prototype to |
| // instantiate a specific element of the set of builtins being defined. |
| // Prototype attribute defines the C/C++ prototype of the builtin. It is a |
| // non-empty sequence of type transformers, the first of which is the return |
| // type of the builtin and the rest are the parameters of the builtin, in |
| // order. For instance if Prototype is "wvv", TypeRange is "si" and LMUL=1, a |
| // first builtin will have type |
| // __rvv_int32m2_t (__rvv_int16m1_t, __rvv_int16m1_t), and the second builtin |
| // will have type __rvv_int64m2_t (__rvv_int32m1_t, __rvv_int32m1_t). |
| string Prototype = prototype; |
| |
| // This builtin has a masked form. |
| bit HasMask = true; |
| |
| // If HasMask, this flag states that this builtin has a maskedoff operand. It |
| // is always the first operand in builtin and IR intrinsic. |
| bit HasMaskedOffOperand = true; |
| |
| // This builtin has a granted vector length parameter in the last position. |
| bit HasVL = true; |
| |
| // This builtin supports function overloading and has a mangled name. |
| bit HasGeneric = true; |
| |
| // Reads or writes "memory" or has other side-effects. |
| bit HasSideEffects = false; |
| |
| // This builtin is valid for the given Log2LMULs. |
| list<int> Log2LMUL = [0, 1, 2, 3, -1, -2, -3]; |
| |
| // Manual code in clang codegen riscv_vector_builtin_cg.inc |
| code ManualCodegen = [{}]; |
| code ManualCodegenMask = [{}]; |
| |
| // When emit the automatic clang codegen, it describes what types we have to use |
| // to obtain the specific LLVM intrinsic. -1 means the return type, otherwise, |
| // k >= 0 meaning the k-th operand (counting from zero) of the codegen'd |
| // parameter of the unmasked version. k can't be the mask operand's position. |
| list<int> IntrinsicTypes = []; |
| |
| // When the order of the parameters of clang builtin do not match the order of |
| // C/C++ api, we use permutation index to mapping the operand from clang |
| // builtin to C/C++. It is parameter of the unmasked version without VL |
| // operand. If empty, the default permutation is [0, 1, 2, ...]. |
| list<int> PermuteOperands = []; |
| |
| // If these names are not empty, this is the ID of the LLVM intrinsic |
| // we want to lower to. |
| string IRName = NAME; |
| |
| // If HasMask, this is the ID of the LLVM intrinsic we want to lower to. |
| string IRNameMask = NAME #"_mask"; |
| |
| // If non empty, this is the code emitted in the header, otherwise |
| // an automatic definition in header is emitted. |
| string HeaderCode = ""; |
| |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Basic classes with automatic codegen. |
| //===----------------------------------------------------------------------===// |
| |
| class RVVBinBuiltin<string suffix, string prototype, string type_range> |
| : RVVBuiltin<suffix, prototype, type_range> { |
| let IntrinsicTypes = [-1, 1]; |
| } |
| |
| multiclass RVVBinBuiltinSet<string intrinsic_name, string type_range, |
| list<list<string>> suffixes_prototypes> { |
| let IRName = intrinsic_name, IRNameMask = intrinsic_name # "_mask" in { |
| foreach s_p = suffixes_prototypes in { |
| let Name = NAME # "_" # s_p[0] in { |
| defvar suffix = s_p[1]; |
| defvar prototype = s_p[2]; |
| def : RVVBinBuiltin<suffix, prototype, type_range>; |
| } |
| } |
| } |
| } |
| |
| defvar TypeList = ["c","s","i","l","f","d"]; |
| defvar EEWList = [["8", "(Log2EEW:3)"], |
| ["16", "(Log2EEW:4)"], |
| ["32", "(Log2EEW:5)"], |
| ["64", "(Log2EEW:6)"]]; |
| |
| class IsFloat<string type> { |
| bit val = !or(!eq(type, "h"), !eq(type, "f"), !eq(type, "d")); |
| } |
| |
| multiclass RVVVLEBuiltin<list<string> types> { |
| let Name = NAME # "_v", |
| IRName = "vle", |
| IRNameMask ="vle_mask", |
| HasGeneric = false, |
| ManualCodegen = [{ |
| IntrinsicTypes = {ResultType, Ops[1]->getType()}; |
| Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo()); |
| }], |
| ManualCodegenMask= [{ |
| IntrinsicTypes = {ResultType, Ops[3]->getType()}; |
| Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo()); |
| }] in { |
| foreach type = types in { |
| def : RVVBuiltin<"v", "vPCe", type>; |
| if !not(IsFloat<type>.val) then { |
| def : RVVBuiltin<"Uv", "UvPCUe", type>; |
| } |
| } |
| } |
| } |
| |
| multiclass RVVIndexedLoad<string op> { |
| let HasGeneric = false, |
| ManualCodegen = [{ |
| IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[2]->getType()}; |
| Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo()); |
| }], |
| ManualCodegenMask = [{ |
| IntrinsicTypes = {ResultType, Ops[2]->getType(), Ops[4]->getType()}; |
| Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo()); |
| }] in { |
| foreach type = TypeList in { |
| foreach eew_list = EEWList in { |
| defvar eew = eew_list[0]; |
| defvar eew_type = eew_list[1]; |
| let Name = op # eew # "_v", IRName = op, IRNameMask = op # "_mask" in { |
| def: RVVBuiltin<"v", "vPCe" # eew_type # "Uv", type>; |
| if !not(IsFloat<type>.val) then { |
| def: RVVBuiltin<"Uv", "UvPCUe" # eew_type # "Uv", type>; |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| multiclass RVVVSEBuiltin<list<string> types> { |
| let Name = NAME # "_v", |
| IRName = "vse", |
| IRNameMask = "vse_mask", |
| HasMaskedOffOperand = false, |
| PermuteOperands = [1, 0], // C/C++ Operand: (ptr, value, vl). Builtin: (value, ptr, vl) |
| HasGeneric = false, |
| ManualCodegen = [{ |
| Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo()); |
| IntrinsicTypes = {Ops[0]->getType(), Ops[2]->getType()}; |
| }], |
| ManualCodegenMask= [{ |
| Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo()); |
| IntrinsicTypes = {Ops[0]->getType(), Ops[3]->getType()}; |
| }] in { |
| foreach type = types in { |
| def : RVVBuiltin<"v", "0vPe", type>; |
| if !not(IsFloat<type>.val) then { |
| def : RVVBuiltin<"Uv", "0UvPUe", type>; |
| } |
| } |
| } |
| } |
| |
| // 6. Configuration-Setting Instructions |
| // 6.1. vsetvli/vsetvl instructions |
| let HasVL = false, |
| HasMask = false, |
| HasSideEffects = true, |
| HasGeneric = false, |
| Log2LMUL = [0], |
| ManualCodegen = [{IntrinsicTypes = {ResultType};}] in // Set XLEN type |
| { |
| // vsetvl is a macro because for it require constant integers in SEW and LMUL. |
| let HeaderCode = |
| [{ |
| #define vsetvl_e8mf8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 5) |
| #define vsetvl_e8mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 6) |
| #define vsetvl_e8mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 7) |
| #define vsetvl_e8m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 0) |
| #define vsetvl_e8m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 1) |
| #define vsetvl_e8m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 2) |
| #define vsetvl_e8m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 3) |
| |
| #define vsetvl_e16mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 6) |
| #define vsetvl_e16mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 7) |
| #define vsetvl_e16m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 0) |
| #define vsetvl_e16m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 1) |
| #define vsetvl_e16m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 2) |
| #define vsetvl_e16m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 3) |
| |
| #define vsetvl_e32mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 7) |
| #define vsetvl_e32m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 0) |
| #define vsetvl_e32m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 1) |
| #define vsetvl_e32m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 2) |
| #define vsetvl_e32m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 3) |
| |
| #define vsetvl_e64m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 0) |
| #define vsetvl_e64m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 1) |
| #define vsetvl_e64m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 2) |
| #define vsetvl_e64m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 3) |
| |
| }] in |
| def vsetvli : RVVBuiltin<"", "zzKzKz", "i">; |
| |
| let HeaderCode = |
| [{ |
| #define vsetvlmax_e8mf8() __builtin_rvv_vsetvlimax(0, 5) |
| #define vsetvlmax_e8mf4() __builtin_rvv_vsetvlimax(0, 6) |
| #define vsetvlmax_e8mf2() __builtin_rvv_vsetvlimax(0, 7) |
| #define vsetvlmax_e8m1() __builtin_rvv_vsetvlimax(0, 0) |
| #define vsetvlmax_e8m2() __builtin_rvv_vsetvlimax(0, 1) |
| #define vsetvlmax_e8m4() __builtin_rvv_vsetvlimax(0, 2) |
| #define vsetvlmax_e8m8() __builtin_rvv_vsetvlimax(0, 3) |
| |
| #define vsetvlmax_e16mf4() __builtin_rvv_vsetvlimax(1, 6) |
| #define vsetvlmax_e16mf2() __builtin_rvv_vsetvlimax(1, 7) |
| #define vsetvlmax_e16m1() __builtin_rvv_vsetvlimax(1, 0) |
| #define vsetvlmax_e16m2() __builtin_rvv_vsetvlimax(1, 1) |
| #define vsetvlmax_e16m4() __builtin_rvv_vsetvlimax(1, 2) |
| #define vsetvlmax_e16m8() __builtin_rvv_vsetvlimax(1, 3) |
| |
| #define vsetvlmax_e32mf2() __builtin_rvv_vsetvlimax(2, 7) |
| #define vsetvlmax_e32m1() __builtin_rvv_vsetvlimax(2, 0) |
| #define vsetvlmax_e32m2() __builtin_rvv_vsetvlimax(2, 1) |
| #define vsetvlmax_e32m4() __builtin_rvv_vsetvlimax(2, 2) |
| #define vsetvlmax_e32m8() __builtin_rvv_vsetvlimax(2, 3) |
| |
| #define vsetvlmax_e64m1() __builtin_rvv_vsetvlimax(3, 0) |
| #define vsetvlmax_e64m2() __builtin_rvv_vsetvlimax(3, 1) |
| #define vsetvlmax_e64m4() __builtin_rvv_vsetvlimax(3, 2) |
| #define vsetvlmax_e64m8() __builtin_rvv_vsetvlimax(3, 3) |
| |
| }] in |
| def vsetvlimax : RVVBuiltin<"", "zKzKz", "i">; |
| } |
| |
| // 7. Vector Loads and Stores |
| // 7.4. Vector Unit-Stride Instructions |
| defm vle8: RVVVLEBuiltin<["c"]>; |
| defm vle16: RVVVLEBuiltin<["s"]>; |
| defm vle32: RVVVLEBuiltin<["i","f"]>; |
| defm vle64: RVVVLEBuiltin<["l","d"]>; |
| |
| defm vse8 : RVVVSEBuiltin<["c"]>; |
| defm vse16: RVVVSEBuiltin<["s"]>; |
| defm vse32: RVVVSEBuiltin<["i","f"]>; |
| defm vse64: RVVVSEBuiltin<["l","d"]>; |
| |
| // 7.6. Vector Indexed Instructions |
| defm : RVVIndexedLoad<"vluxei">; |
| defm : RVVIndexedLoad<"vloxei">; |
| |
| // 12. Vector Integer Arithmetic Instructions |
| // 12.1. Vector Single-Width Integer Add and Subtract |
| defm vadd : RVVBinBuiltinSet<"vadd", "csil", |
| [["vv", "v", "vvv"], |
| ["vx", "v", "vve"], |
| ["vv", "Uv", "UvUvUv"], |
| ["vx", "Uv", "UvUvUe"]]>; |
| |
| // 14. Vector Floating-Point Instructions |
| // 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions |
| defm vfadd : RVVBinBuiltinSet<"vfadd", "fd", |
| [["vv", "v", "vvv"], |
| ["vf", "v", "vve"]]>; |