blob: 654d09c8c908275a2ee2c6cd8f45b812cc60405f [file] [log] [blame]
//==--- riscv_vector.td - RISC-V V-ext Builtin function list --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the builtins for RISC-V V-extension. See:
//
// https://github.com/riscv/rvv-intrinsic-doc
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Instruction definitions
//===----------------------------------------------------------------------===//
// Each record of the class RVVBuiltin defines a collection of builtins (i.e.
// "def vadd : RVVBuiltin" will be used to define things like "vadd_vv_i32m1",
// "vadd_vv_i32m2", etc).
//
// The elements of this collection are defined by an instantiation process the
// range of which is specified by the cross product of the LMUL attribute and
// every element in the attribute TypeRange. By default builtins have LMUL = [1,
// 2, 4, 8, 1/2, 1/4, 1/8] so the process is repeated 7 times. In tablegen we
// use the Log2LMUL [0, 1, 2, 3, -1, -2, -3] to represent the LMUL.
//
// LMUL represents the fact that the types of values used by that builtin are
// values generated by instructions that are executed under that LMUL. However,
// this does not mean the builtin is necessarily lowered into an instruction
// that executes under the specified LMUL. An example where this happens are
// loads and stores of masks. A mask like `vbool8_t` can be generated, for
// instance, by comparing two `__rvv_int8m1_t` (this is LMUL=1) or comparing two
// `__rvv_int16m2_t` (this is LMUL=2). The actual load or store, however, will
// be performed under LMUL=1 because mask registers are not grouped.
//
// TypeRange is a non-empty sequence of basic types:
//
// c: int8_t (i8)
// s: int16_t (i16)
// i: int32_t (i32)
// l: int64_t (i64)
// h: float16_t (half)
// f: float32_t (float)
// d: float64_t (double)
//
// This way, given an LMUL, a record with a TypeRange "sil" will cause the
// definition of 3 builtins. Each type "t" in the TypeRange (in this example
// they are int16_t, int32_t, int64_t) is used as a parameter that drives the
// definition of that particular builtin (for the given LMUL).
//
// During the instantiation, types can be transformed or modified using type
// transformers. Given a type "t" the following primitive type transformers can
// be applied to it to yield another type.
//
// e: type of "t" as is (identity)
// v: computes a vector type whose element type is "t" for the current LMUL
// w: computes a vector type identical to what 'v' computes except for the
// element type which is twice as wide as the element type of 'v'
// q: computes a vector type identical to what 'v' computes except for the
// element type which is four times as wide as the element type of 'v'
// o: computes a vector type identical to what 'v' computes except for the
// element type which is eight times as wide as the element type of 'v'
// m: computes a vector type identical to what 'v' computes except for the
// element type which is bool
// 0: void type, ignores "t"
// z: size_t, ignores "t"
// t: ptrdiff_t, ignores "t"
// c: uint8_t, ignores "t"
//
// So for instance if t is "i", i.e. int, then "e" will yield int again. "v"
// will yield an RVV vector type (assume LMUL=1), so __rvv_int32m1_t.
// Accordingly "w" would yield __rvv_int64m2_t.
//
// A type transformer can be prefixed by other non-primitive type transformers.
//
// P: constructs a pointer to the current type
// C: adds const to the type
// K: requires the integer type to be a constant expression
// U: given an integer type or vector type, computes its unsigned variant
// I: given a vector type, compute the vector type with integer type
// elements of the same width
// F: given a vector type, compute the vector type with floating-point type
// elements of the same width
// S: given a vector type, computes its equivalent one for LMUL=1. This is a
// no-op if the vector was already LMUL=1
// (Log2EEW:Value): Log2EEW value could be 3/4/5/6 (8/16/32/64), given a
// vector type (SEW and LMUL) and EEW (8/16/32/64), computes its
// equivalent integer vector type with EEW and corresponding ELMUL (elmul =
// (eew/sew) * lmul). For example, vector type is __rvv_float16m4
// (SEW=16, LMUL=4) and Log2EEW is 3 (EEW=8), and then equivalent vector
// type is __rvv_uint8m2_t (elmul=(8/16)*4 = 2). Ignore to define a new
// builtins if its equivalent type has illegal lmul.
//
// Following with the example above, if t is "i", then "Ue" will yield unsigned
// int and "Fv" will yield __rvv_float32m1_t (again assuming LMUL=1), Fw would
// yield __rvv_float64m2_t, etc.
//
// Each builtin is then defined by applying each type in TypeRange against the
// sequence of type transformers described in Suffix and Prototype.
//
// The name of the builtin is defined by the Name attribute (which defaults to
// the name of the class) appended (separated with an underscore) the Suffix
// attribute. For instance with Name="foo", Suffix = "v" and TypeRange = "il",
// the builtin generated will be __builtin_rvv_foo_i32m1 and
// __builtin_rvv_foo_i64m1 (under LMUL=1). If Suffix contains more than one
// type transformer (say "vv") each of the types is separated with an
// underscore as in "__builtin_rvv_foo_i32m1_i32m1".
//
// The C/C++ prototype of the builtin is defined by the Prototype attribute.
// Prototype is a non-empty sequence of type transformers, the first of which
// is the return type of the builtin and the rest are the parameters of the
// builtin, in order. For instance if Prototype is "wvv" and TypeRange is "si"
// a first builtin will have type
// __rvv_int32m2_t (__rvv_int16m1_t, __rvv_int16m1_t) and the second builtin
// will have type __rvv_int64m2_t (__rvv_int32m1_t, __rvv_int32m1_t) (again
// under LMUL=1).
//
// There are a number of attributes that are used to constraint the number and
// shape of the builtins generated. Refer to the comments below for them.
class RVVBuiltin<string suffix, string prototype, string type_range,
string managed_suffix = ""> {
// Base name that will be prepended in __builtin_rvv_ and appended the
// computed Suffix.
string Name = NAME;
// If not empty, each instantiated builtin will have this appended after an
// underscore (_). It is instantiated like Prototype.
string Suffix = suffix;
// If empty, default MangledName is sub string of `Name` which end of first
// '_'. For example, the default mangled name is `vadd` for Name `vadd_vv`.
// It's used for describe some special naming cases.
string MangledName = "";
// The different variants of the builtin, parameterised with a type.
string TypeRange = type_range;
// We use each type described in TypeRange and LMUL with prototype to
// instantiate a specific element of the set of builtins being defined.
// Prototype attribute defines the C/C++ prototype of the builtin. It is a
// non-empty sequence of type transformers, the first of which is the return
// type of the builtin and the rest are the parameters of the builtin, in
// order. For instance if Prototype is "wvv", TypeRange is "si" and LMUL=1, a
// first builtin will have type
// __rvv_int32m2_t (__rvv_int16m1_t, __rvv_int16m1_t), and the second builtin
// will have type __rvv_int64m2_t (__rvv_int32m1_t, __rvv_int32m1_t).
string Prototype = prototype;
// This builtin has a masked form.
bit HasMask = true;
// If HasMask, this flag states that this builtin has a maskedoff operand. It
// is always the first operand in builtin and IR intrinsic.
bit HasMaskedOffOperand = true;
// This builtin has a granted vector length parameter in the last position.
bit HasVL = true;
// This builtin supports function overloading and has a mangled name.
bit HasGeneric = true;
// Reads or writes "memory" or has other side-effects.
bit HasSideEffects = false;
// This builtin is valid for the given Log2LMULs.
list<int> Log2LMUL = [0, 1, 2, 3, -1, -2, -3];
// Manual code in clang codegen riscv_vector_builtin_cg.inc
code ManualCodegen = [{}];
code ManualCodegenMask = [{}];
// When emit the automatic clang codegen, it describes what types we have to use
// to obtain the specific LLVM intrinsic. -1 means the return type, otherwise,
// k >= 0 meaning the k-th operand (counting from zero) of the codegen'd
// parameter of the unmasked version. k can't be the mask operand's position.
list<int> IntrinsicTypes = [];
// When the order of the parameters of clang builtin do not match the order of
// C/C++ api, we use permutation index to mapping the operand from clang
// builtin to C/C++. It is parameter of the unmasked version without VL
// operand. If empty, the default permutation is [0, 1, 2, ...].
list<int> PermuteOperands = [];
// If these names are not empty, this is the ID of the LLVM intrinsic
// we want to lower to.
string IRName = NAME;
// If HasMask, this is the ID of the LLVM intrinsic we want to lower to.
string IRNameMask = NAME #"_mask";
// If non empty, this is the code emitted in the header, otherwise
// an automatic definition in header is emitted.
string HeaderCode = "";
}
//===----------------------------------------------------------------------===//
// Basic classes with automatic codegen.
//===----------------------------------------------------------------------===//
class RVVBinBuiltin<string suffix, string prototype, string type_range>
: RVVBuiltin<suffix, prototype, type_range> {
let IntrinsicTypes = [-1, 1];
}
multiclass RVVBinBuiltinSet<string intrinsic_name, string type_range,
list<list<string>> suffixes_prototypes> {
let IRName = intrinsic_name, IRNameMask = intrinsic_name # "_mask" in {
foreach s_p = suffixes_prototypes in {
let Name = NAME # "_" # s_p[0] in {
defvar suffix = s_p[1];
defvar prototype = s_p[2];
def : RVVBinBuiltin<suffix, prototype, type_range>;
}
}
}
}
defvar TypeList = ["c","s","i","l","f","d"];
defvar EEWList = [["8", "(Log2EEW:3)"],
["16", "(Log2EEW:4)"],
["32", "(Log2EEW:5)"],
["64", "(Log2EEW:6)"]];
class IsFloat<string type> {
bit val = !or(!eq(type, "h"), !eq(type, "f"), !eq(type, "d"));
}
multiclass RVVVLEBuiltin<list<string> types> {
let Name = NAME # "_v",
IRName = "vle",
IRNameMask ="vle_mask",
HasGeneric = false,
ManualCodegen = [{
IntrinsicTypes = {ResultType, Ops[1]->getType()};
Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo());
}],
ManualCodegenMask= [{
IntrinsicTypes = {ResultType, Ops[3]->getType()};
Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo());
}] in {
foreach type = types in {
def : RVVBuiltin<"v", "vPCe", type>;
if !not(IsFloat<type>.val) then {
def : RVVBuiltin<"Uv", "UvPCUe", type>;
}
}
}
}
multiclass RVVIndexedLoad<string op> {
let HasGeneric = false,
ManualCodegen = [{
IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[2]->getType()};
Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo());
}],
ManualCodegenMask = [{
IntrinsicTypes = {ResultType, Ops[2]->getType(), Ops[4]->getType()};
Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo());
}] in {
foreach type = TypeList in {
foreach eew_list = EEWList in {
defvar eew = eew_list[0];
defvar eew_type = eew_list[1];
let Name = op # eew # "_v", IRName = op, IRNameMask = op # "_mask" in {
def: RVVBuiltin<"v", "vPCe" # eew_type # "Uv", type>;
if !not(IsFloat<type>.val) then {
def: RVVBuiltin<"Uv", "UvPCUe" # eew_type # "Uv", type>;
}
}
}
}
}
}
multiclass RVVVSEBuiltin<list<string> types> {
let Name = NAME # "_v",
IRName = "vse",
IRNameMask = "vse_mask",
HasMaskedOffOperand = false,
PermuteOperands = [1, 0], // C/C++ Operand: (ptr, value, vl). Builtin: (value, ptr, vl)
HasGeneric = false,
ManualCodegen = [{
Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo());
IntrinsicTypes = {Ops[0]->getType(), Ops[2]->getType()};
}],
ManualCodegenMask= [{
Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo());
IntrinsicTypes = {Ops[0]->getType(), Ops[3]->getType()};
}] in {
foreach type = types in {
def : RVVBuiltin<"v", "0vPe", type>;
if !not(IsFloat<type>.val) then {
def : RVVBuiltin<"Uv", "0UvPUe", type>;
}
}
}
}
// 6. Configuration-Setting Instructions
// 6.1. vsetvli/vsetvl instructions
let HasVL = false,
HasMask = false,
HasSideEffects = true,
HasGeneric = false,
Log2LMUL = [0],
ManualCodegen = [{IntrinsicTypes = {ResultType};}] in // Set XLEN type
{
// vsetvl is a macro because for it require constant integers in SEW and LMUL.
let HeaderCode =
[{
#define vsetvl_e8mf8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 5)
#define vsetvl_e8mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 6)
#define vsetvl_e8mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 7)
#define vsetvl_e8m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 0)
#define vsetvl_e8m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 1)
#define vsetvl_e8m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 2)
#define vsetvl_e8m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 3)
#define vsetvl_e16mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 6)
#define vsetvl_e16mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 7)
#define vsetvl_e16m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 0)
#define vsetvl_e16m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 1)
#define vsetvl_e16m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 2)
#define vsetvl_e16m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 3)
#define vsetvl_e32mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 7)
#define vsetvl_e32m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 0)
#define vsetvl_e32m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 1)
#define vsetvl_e32m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 2)
#define vsetvl_e32m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 3)
#define vsetvl_e64m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 0)
#define vsetvl_e64m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 1)
#define vsetvl_e64m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 2)
#define vsetvl_e64m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 3)
}] in
def vsetvli : RVVBuiltin<"", "zzKzKz", "i">;
let HeaderCode =
[{
#define vsetvlmax_e8mf8() __builtin_rvv_vsetvlimax(0, 5)
#define vsetvlmax_e8mf4() __builtin_rvv_vsetvlimax(0, 6)
#define vsetvlmax_e8mf2() __builtin_rvv_vsetvlimax(0, 7)
#define vsetvlmax_e8m1() __builtin_rvv_vsetvlimax(0, 0)
#define vsetvlmax_e8m2() __builtin_rvv_vsetvlimax(0, 1)
#define vsetvlmax_e8m4() __builtin_rvv_vsetvlimax(0, 2)
#define vsetvlmax_e8m8() __builtin_rvv_vsetvlimax(0, 3)
#define vsetvlmax_e16mf4() __builtin_rvv_vsetvlimax(1, 6)
#define vsetvlmax_e16mf2() __builtin_rvv_vsetvlimax(1, 7)
#define vsetvlmax_e16m1() __builtin_rvv_vsetvlimax(1, 0)
#define vsetvlmax_e16m2() __builtin_rvv_vsetvlimax(1, 1)
#define vsetvlmax_e16m4() __builtin_rvv_vsetvlimax(1, 2)
#define vsetvlmax_e16m8() __builtin_rvv_vsetvlimax(1, 3)
#define vsetvlmax_e32mf2() __builtin_rvv_vsetvlimax(2, 7)
#define vsetvlmax_e32m1() __builtin_rvv_vsetvlimax(2, 0)
#define vsetvlmax_e32m2() __builtin_rvv_vsetvlimax(2, 1)
#define vsetvlmax_e32m4() __builtin_rvv_vsetvlimax(2, 2)
#define vsetvlmax_e32m8() __builtin_rvv_vsetvlimax(2, 3)
#define vsetvlmax_e64m1() __builtin_rvv_vsetvlimax(3, 0)
#define vsetvlmax_e64m2() __builtin_rvv_vsetvlimax(3, 1)
#define vsetvlmax_e64m4() __builtin_rvv_vsetvlimax(3, 2)
#define vsetvlmax_e64m8() __builtin_rvv_vsetvlimax(3, 3)
}] in
def vsetvlimax : RVVBuiltin<"", "zKzKz", "i">;
}
// 7. Vector Loads and Stores
// 7.4. Vector Unit-Stride Instructions
defm vle8: RVVVLEBuiltin<["c"]>;
defm vle16: RVVVLEBuiltin<["s"]>;
defm vle32: RVVVLEBuiltin<["i","f"]>;
defm vle64: RVVVLEBuiltin<["l","d"]>;
defm vse8 : RVVVSEBuiltin<["c"]>;
defm vse16: RVVVSEBuiltin<["s"]>;
defm vse32: RVVVSEBuiltin<["i","f"]>;
defm vse64: RVVVSEBuiltin<["l","d"]>;
// 7.6. Vector Indexed Instructions
defm : RVVIndexedLoad<"vluxei">;
defm : RVVIndexedLoad<"vloxei">;
// 12. Vector Integer Arithmetic Instructions
// 12.1. Vector Single-Width Integer Add and Subtract
defm vadd : RVVBinBuiltinSet<"vadd", "csil",
[["vv", "v", "vvv"],
["vx", "v", "vve"],
["vv", "Uv", "UvUvUv"],
["vx", "Uv", "UvUvUe"]]>;
// 14. Vector Floating-Point Instructions
// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
defm vfadd : RVVBinBuiltinSet<"vfadd", "fd",
[["vv", "v", "vvv"],
["vf", "v", "vve"]]>;