| //==--- riscv_vector.td - RISC-V V-ext Builtin function list --------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines the builtins for RISC-V V-extension. See: |
| // |
| // https://github.com/riscv/rvv-intrinsic-doc |
| // |
| //===----------------------------------------------------------------------===// |
| |
| //===----------------------------------------------------------------------===// |
| // Instruction definitions |
| //===----------------------------------------------------------------------===// |
| // Each record of the class RVVBuiltin defines a collection of builtins (i.e. |
| // "def vadd : RVVBuiltin" will be used to define things like "vadd_vv_i32m1", |
| // "vadd_vv_i32m2", etc). |
| // |
| // The elements of this collection are defined by an instantiation process the |
| // range of which is specified by the cross product of the LMUL attribute and |
| // every element in the attribute TypeRange. By default builtins have LMUL = [1, |
| // 2, 4, 8, 1/2, 1/4, 1/8] so the process is repeated 7 times. In tablegen we |
| // use the Log2LMUL [0, 1, 2, 3, -1, -2, -3] to represent the LMUL. |
| // |
| // LMUL represents the fact that the types of values used by that builtin are |
| // values generated by instructions that are executed under that LMUL. However, |
| // this does not mean the builtin is necessarily lowered into an instruction |
| // that executes under the specified LMUL. An example where this happens are |
| // loads and stores of masks. A mask like `vbool8_t` can be generated, for |
| // instance, by comparing two `__rvv_int8m1_t` (this is LMUL=1) or comparing two |
| // `__rvv_int16m2_t` (this is LMUL=2). The actual load or store, however, will |
| // be performed under LMUL=1 because mask registers are not grouped. |
| // |
| // TypeRange is a non-empty sequence of basic types: |
| // |
| // c: int8_t (i8) |
| // s: int16_t (i16) |
| // i: int32_t (i32) |
| // l: int64_t (i64) |
| // h: float16_t (half) |
| // f: float32_t (float) |
| // d: float64_t (double) |
| // |
| // This way, given an LMUL, a record with a TypeRange "sil" will cause the |
| // definition of 3 builtins. Each type "t" in the TypeRange (in this example |
| // they are int16_t, int32_t, int64_t) is used as a parameter that drives the |
| // definition of that particular builtin (for the given LMUL). |
| // |
| // During the instantiation, types can be transformed or modified using type |
| // transformers. Given a type "t" the following primitive type transformers can |
| // be applied to it to yield another type. |
| // |
| // e: type of "t" as is (identity) |
| // v: computes a vector type whose element type is "t" for the current LMUL |
| // w: computes a vector type identical to what 'v' computes except for the |
| // element type which is twice as wide as the element type of 'v' |
| // q: computes a vector type identical to what 'v' computes except for the |
| // element type which is four times as wide as the element type of 'v' |
| // o: computes a vector type identical to what 'v' computes except for the |
| // element type which is eight times as wide as the element type of 'v' |
| // m: computes a vector type identical to what 'v' computes except for the |
| // element type which is bool |
| // 0: void type, ignores "t" |
| // z: size_t, ignores "t" |
| // t: ptrdiff_t, ignores "t" |
| // c: uint8_t, ignores "t" |
| // |
| // So for instance if t is "i", i.e. int, then "e" will yield int again. "v" |
| // will yield an RVV vector type (assume LMUL=1), so __rvv_int32m1_t. |
| // Accordingly "w" would yield __rvv_int64m2_t. |
| // |
| // A type transformer can be prefixed by other non-primitive type transformers. |
| // |
| // P: constructs a pointer to the current type |
| // C: adds const to the type |
| // K: requires the integer type to be a constant expression |
| // U: given an integer type or vector type, computes its unsigned variant |
| // I: given a vector type, compute the vector type with integer type |
| // elements of the same width |
| // F: given a vector type, compute the vector type with floating-point type |
| // elements of the same width |
| // S: given a vector type, computes its equivalent one for LMUL=1. This is a |
| // no-op if the vector was already LMUL=1 |
| // (Log2EEW:Value): Log2EEW value could be 3/4/5/6 (8/16/32/64), given a |
| // vector type (SEW and LMUL) and EEW (8/16/32/64), computes its |
| // equivalent integer vector type with EEW and corresponding ELMUL (elmul = |
| // (eew/sew) * lmul). For example, vector type is __rvv_float16m4 |
| // (SEW=16, LMUL=4) and Log2EEW is 3 (EEW=8), and then equivalent vector |
| // type is __rvv_uint8m2_t (elmul=(8/16)*4 = 2). Ignore to define a new |
| // builtins if its equivalent type has illegal lmul. |
| // |
| // Following with the example above, if t is "i", then "Ue" will yield unsigned |
| // int and "Fv" will yield __rvv_float32m1_t (again assuming LMUL=1), Fw would |
| // yield __rvv_float64m2_t, etc. |
| // |
| // Each builtin is then defined by applying each type in TypeRange against the |
| // sequence of type transformers described in Suffix and Prototype. |
| // |
| // The name of the builtin is defined by the Name attribute (which defaults to |
| // the name of the class) appended (separated with an underscore) the Suffix |
| // attribute. For instance with Name="foo", Suffix = "v" and TypeRange = "il", |
| // the builtin generated will be __builtin_rvv_foo_i32m1 and |
| // __builtin_rvv_foo_i64m1 (under LMUL=1). If Suffix contains more than one |
| // type transformer (say "vv") each of the types is separated with an |
| // underscore as in "__builtin_rvv_foo_i32m1_i32m1". |
| // |
| // The C/C++ prototype of the builtin is defined by the Prototype attribute. |
| // Prototype is a non-empty sequence of type transformers, the first of which |
| // is the return type of the builtin and the rest are the parameters of the |
| // builtin, in order. For instance if Prototype is "wvv" and TypeRange is "si" |
| // a first builtin will have type |
| // __rvv_int32m2_t (__rvv_int16m1_t, __rvv_int16m1_t) and the second builtin |
| // will have type __rvv_int64m2_t (__rvv_int32m1_t, __rvv_int32m1_t) (again |
| // under LMUL=1). |
| // |
| // There are a number of attributes that are used to constraint the number and |
| // shape of the builtins generated. Refer to the comments below for them. |
| class RVVBuiltin<string suffix, string prototype, string type_range, |
| string managed_suffix = ""> { |
| // Base name that will be prepended in __builtin_rvv_ and appended the |
| // computed Suffix. |
| string Name = NAME; |
| |
| // If not empty, each instantiated builtin will have this appended after an |
| // underscore (_). It is instantiated like Prototype. |
| string Suffix = suffix; |
| |
| // If empty, default MangledName is sub string of `Name` which end of first |
| // '_'. For example, the default mangled name is `vadd` for Name `vadd_vv`. |
| // It's used for describe some special naming cases. |
| string MangledName = ""; |
| |
| // The different variants of the builtin, parameterised with a type. |
| string TypeRange = type_range; |
| |
| // We use each type described in TypeRange and LMUL with prototype to |
| // instantiate a specific element of the set of builtins being defined. |
| // Prototype attribute defines the C/C++ prototype of the builtin. It is a |
| // non-empty sequence of type transformers, the first of which is the return |
| // type of the builtin and the rest are the parameters of the builtin, in |
| // order. For instance if Prototype is "wvv", TypeRange is "si" and LMUL=1, a |
| // first builtin will have type |
| // __rvv_int32m2_t (__rvv_int16m1_t, __rvv_int16m1_t), and the second builtin |
| // will have type __rvv_int64m2_t (__rvv_int32m1_t, __rvv_int32m1_t). |
| string Prototype = prototype; |
| |
| // This builtin has a masked form. |
| bit HasMask = true; |
| |
| // If HasMask, this flag states that this builtin has a maskedoff operand. It |
| // is always the first operand in builtin and IR intrinsic. |
| bit HasMaskedOffOperand = true; |
| |
| // This builtin has a granted vector length parameter in the last position. |
| bit HasVL = true; |
| |
| // This builtin supports non-masked function overloading api. |
| // All masked operations support overloading api. |
| bit HasNoMaskedOverloaded = true; |
| |
| // Reads or writes "memory" or has other side-effects. |
| bit HasSideEffects = false; |
| |
| // This builtin is valid for the given Log2LMULs. |
| list<int> Log2LMUL = [0, 1, 2, 3, -1, -2, -3]; |
| |
| // Manual code in clang codegen riscv_vector_builtin_cg.inc |
| code ManualCodegen = [{}]; |
| code ManualCodegenMask = [{}]; |
| |
| // When emit the automatic clang codegen, it describes what types we have to use |
| // to obtain the specific LLVM intrinsic. -1 means the return type, otherwise, |
| // k >= 0 meaning the k-th operand (counting from zero) of the codegen'd |
| // parameter of the unmasked version. k can't be the mask operand's position. |
| list<int> IntrinsicTypes = []; |
| |
| // When the order of the parameters of clang builtin do not match the order of |
| // C/C++ api, we use permutation index to mapping the operand from clang |
| // builtin to C/C++. It is parameter of the unmasked version without VL |
| // operand. If empty, the default permutation is [0, 1, 2, ...]. |
| list<int> PermuteOperands = []; |
| |
| // If these names are not empty, this is the ID of the LLVM intrinsic |
| // we want to lower to. |
| string IRName = NAME; |
| |
| // If HasMask, this is the ID of the LLVM intrinsic we want to lower to. |
| string IRNameMask = NAME #"_mask"; |
| |
| // If non empty, this is the code emitted in the header, otherwise |
| // an automatic definition in header is emitted. |
| string HeaderCode = ""; |
| |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Basic classes with automatic codegen. |
| //===----------------------------------------------------------------------===// |
| |
| class RVVOutBuiltin<string suffix, string prototype, string type_range> |
| : RVVBuiltin<suffix, prototype, type_range> { |
| let IntrinsicTypes = [-1]; |
| } |
| |
| class RVVOp0Builtin<string suffix, string prototype, string type_range> |
| : RVVBuiltin<suffix, prototype, type_range> { |
| let IntrinsicTypes = [0]; |
| } |
| |
| class RVVOutOp1Builtin<string suffix, string prototype, string type_range> |
| : RVVBuiltin<suffix, prototype, type_range> { |
| let IntrinsicTypes = [-1, 1]; |
| } |
| |
| class RVVOutOp0Op1Builtin<string suffix, string prototype, string type_range> |
| : RVVBuiltin<suffix, prototype, type_range> { |
| let IntrinsicTypes = [-1, 0, 1]; |
| } |
| |
| multiclass RVVBuiltinSet<string intrinsic_name, string type_range, |
| list<list<string>> suffixes_prototypes, |
| list<int> intrinsic_types> { |
| let IRName = intrinsic_name, IRNameMask = intrinsic_name # "_mask", |
| IntrinsicTypes = intrinsic_types in { |
| foreach s_p = suffixes_prototypes in { |
| let Name = NAME # "_" # s_p[0] in { |
| defvar suffix = s_p[1]; |
| defvar prototype = s_p[2]; |
| def : RVVBuiltin<suffix, prototype, type_range>; |
| } |
| } |
| } |
| } |
| |
| // IntrinsicTypes is output, op0, op1 [-1, 0, 1] |
| multiclass RVVOutOp0Op1BuiltinSet<string intrinsic_name, string type_range, |
| list<list<string>> suffixes_prototypes> |
| : RVVBuiltinSet<intrinsic_name, type_range, suffixes_prototypes, |
| [-1, 0, 1]>; |
| |
| // IntrinsicTypes is output, op1 [-1, 1] |
| multiclass RVVOutOp1BuiltinSet<string intrinsic_name, string type_range, |
| list<list<string>> suffixes_prototypes> |
| : RVVBuiltinSet<intrinsic_name, type_range, suffixes_prototypes, [-1, 1]>; |
| |
| multiclass RVVOp0Op1BuiltinSet<string intrinsic_name, string type_range, |
| list<list<string>> suffixes_prototypes> |
| : RVVBuiltinSet<intrinsic_name, type_range, suffixes_prototypes, [0, 1]>; |
| |
| multiclass RVVOutOp1Op2BuiltinSet<string intrinsic_name, string type_range, |
| list<list<string>> suffixes_prototypes> |
| : RVVBuiltinSet<intrinsic_name, type_range, suffixes_prototypes, [-1, 1, 2]>; |
| |
| multiclass RVVSignedBinBuiltinSet |
| : RVVOutOp1BuiltinSet<NAME, "csil", |
| [["vv", "v", "vvv"], |
| ["vx", "v", "vve"]]>; |
| |
| multiclass RVVUnsignedBinBuiltinSet |
| : RVVOutOp1BuiltinSet<NAME, "csil", |
| [["vv", "Uv", "UvUvUv"], |
| ["vx", "Uv", "UvUvUe"]]>; |
| |
| multiclass RVVIntBinBuiltinSet |
| : RVVSignedBinBuiltinSet, |
| RVVUnsignedBinBuiltinSet; |
| |
| multiclass RVVSignedShiftBuiltinSet |
| : RVVOutOp1BuiltinSet<NAME, "csil", |
| [["vv", "v", "vvUv"], |
| ["vx", "v", "vvz"]]>; |
| |
| multiclass RVVUnsignedShiftBuiltinSet |
| : RVVOutOp1BuiltinSet<NAME, "csil", |
| [["vv", "Uv", "UvUvUv"], |
| ["vx", "Uv", "UvUvz"]]>; |
| |
| multiclass RVVShiftBuiltinSet |
| : RVVSignedShiftBuiltinSet, |
| RVVUnsignedShiftBuiltinSet; |
| |
| let Log2LMUL = [-3, -2, -1, 0, 1, 2] in { |
| multiclass RVVSignedNShiftBuiltinSet |
| : RVVOutOp0Op1BuiltinSet<NAME, "csil", |
| [["wv", "v", "vwUv"], |
| ["wx", "v", "vwz"]]>; |
| multiclass RVVUnsignedNShiftBuiltinSet |
| : RVVOutOp0Op1BuiltinSet<NAME, "csil", |
| [["wv", "Uv", "UvUwUv"], |
| ["wx", "Uv", "UvUwz"]]>; |
| } |
| |
| multiclass RVVCarryinBuiltinSet |
| : RVVOutOp1BuiltinSet<NAME, "csil", |
| [["vvm", "v", "vvvm"], |
| ["vxm", "v", "vvem"], |
| ["vvm", "Uv", "UvUvUvm"], |
| ["vxm", "Uv", "UvUvUem"]]>; |
| |
| multiclass RVVCarryOutInBuiltinSet<string intrinsic_name> |
| : RVVOp0Op1BuiltinSet<intrinsic_name, "csil", |
| [["vvm", "vm", "mvvm"], |
| ["vxm", "vm", "mvem"], |
| ["vvm", "Uvm", "mUvUvm"], |
| ["vxm", "Uvm", "mUvUem"]]>; |
| |
| multiclass RVVSignedMaskOutBuiltinSet |
| : RVVOp0Op1BuiltinSet<NAME, "csil", |
| [["vv", "vm", "mvv"], |
| ["vx", "vm", "mve"]]>; |
| |
| multiclass RVVUnsignedMaskOutBuiltinSet |
| : RVVOp0Op1BuiltinSet<NAME, "csil", |
| [["vv", "Uvm", "mUvUv"], |
| ["vx", "Uvm", "mUvUe"]]>; |
| |
| multiclass RVVIntMaskOutBuiltinSet |
| : RVVSignedMaskOutBuiltinSet, |
| RVVUnsignedMaskOutBuiltinSet; |
| |
| class RVVIntExt<string intrinsic_name, string suffix, string prototype, |
| string type_range> |
| : RVVBuiltin<suffix, prototype, type_range> { |
| let IRName = intrinsic_name; |
| let IRNameMask = intrinsic_name # "_mask"; |
| let MangledName = NAME; |
| let IntrinsicTypes = [-1, 0]; |
| } |
| |
| let HasMaskedOffOperand = false in { |
| multiclass RVVIntTerBuiltinSet { |
| defm "" : RVVOutOp1BuiltinSet<NAME, "csil", |
| [["vv", "v", "vvvv"], |
| ["vx", "v", "vvev"], |
| ["vv", "Uv", "UvUvUvUv"], |
| ["vx", "Uv", "UvUvUeUv"]]>; |
| } |
| multiclass RVVFloatingTerBuiltinSet { |
| defm "" : RVVOutOp1BuiltinSet<NAME, "fd", |
| [["vv", "v", "vvvv"], |
| ["vf", "v", "vvev"]]>; |
| } |
| } |
| |
| let HasMaskedOffOperand = false, Log2LMUL = [-1, 0, 1, 2] in { |
| multiclass RVVFloatingWidenTerBuiltinSet { |
| defm "" : RVVOutOp1Op2BuiltinSet<NAME, "f", |
| [["vv", "w", "wwvv"], |
| ["vf", "w", "wwev"]]>; |
| } |
| } |
| |
| multiclass RVVFloatingBinBuiltinSet |
| : RVVOutOp1BuiltinSet<NAME, "fd", |
| [["vv", "v", "vvv"], |
| ["vf", "v", "vve"]]>; |
| |
| multiclass RVVFloatingBinVFBuiltinSet |
| : RVVOutOp1BuiltinSet<NAME, "fd", |
| [["vf", "v", "vve"]]>; |
| |
| multiclass RVVFloatingMaskOutBuiltinSet |
| : RVVOp0Op1BuiltinSet<NAME, "fd", |
| [["vv", "vm", "mvv"], |
| ["vf", "vm", "mve"]]>; |
| |
| multiclass RVVFloatingMaskOutVFBuiltinSet |
| : RVVOp0Op1BuiltinSet<NAME, "fd", |
| [["vf", "vm", "mve"]]>; |
| |
| class RVVFloatingUnaryBuiltin<string builtin_suffix, string ir_suffix, |
| string prototype> |
| : RVVOutBuiltin<ir_suffix, prototype, "fd"> { |
| let Name = NAME # "_" # builtin_suffix; |
| } |
| |
| class RVVFloatingUnaryVVBuiltin : RVVFloatingUnaryBuiltin<"v", "v", "vv">; |
| |
| // For widen operation which has different mangling name. |
| multiclass RVVWidenBuiltinSet<string intrinsic_name, string type_range, |
| list<list<string>> suffixes_prototypes> { |
| let Log2LMUL = [-3, -2, -1, 0, 1, 2], |
| IRName = intrinsic_name, IRNameMask = intrinsic_name # "_mask" in { |
| foreach s_p = suffixes_prototypes in { |
| let Name = NAME # "_" # s_p[0], |
| MangledName = NAME # "_" # s_p[0] in { |
| defvar suffix = s_p[1]; |
| defvar prototype = s_p[2]; |
| def : RVVOutOp0Op1Builtin<suffix, prototype, type_range>; |
| } |
| } |
| } |
| } |
| |
| // For widen operation with widen operand which has different mangling name. |
| multiclass RVVWidenWOp0BuiltinSet<string intrinsic_name, string type_range, |
| list<list<string>> suffixes_prototypes> { |
| let Log2LMUL = [-3, -2, -1, 0, 1, 2], |
| IRName = intrinsic_name, IRNameMask = intrinsic_name # "_mask" in { |
| foreach s_p = suffixes_prototypes in { |
| let Name = NAME # "_" # s_p[0], |
| MangledName = NAME # "_" # s_p[0] in { |
| defvar suffix = s_p[1]; |
| defvar prototype = s_p[2]; |
| def : RVVOutOp1Builtin<suffix, prototype, type_range>; |
| } |
| } |
| } |
| } |
| |
| multiclass RVVSignedWidenBinBuiltinSet |
| : RVVWidenBuiltinSet<NAME, "csi", |
| [["vv", "w", "wvv"], |
| ["vx", "w", "wve"]]>; |
| |
| multiclass RVVSignedWidenOp0BinBuiltinSet |
| : RVVWidenWOp0BuiltinSet<NAME # "_w", "csi", |
| [["wv", "w", "wwv"], |
| ["wx", "w", "wwe"]]>; |
| |
| multiclass RVVUnsignedWidenBinBuiltinSet |
| : RVVWidenBuiltinSet<NAME, "csi", |
| [["vv", "Uw", "UwUvUv"], |
| ["vx", "Uw", "UwUvUe"]]>; |
| |
| multiclass RVVUnsignedWidenOp0BinBuiltinSet |
| : RVVWidenWOp0BuiltinSet<NAME # "_w", "csi", |
| [["wv", "Uw", "UwUwUv"], |
| ["wx", "Uw", "UwUwUe"]]>; |
| |
| multiclass RVVFloatingWidenBinBuiltinSet |
| : RVVWidenBuiltinSet<NAME, "f", |
| [["vv", "w", "wvv"], |
| ["vf", "w", "wve"]]>; |
| |
| multiclass RVVFloatingWidenOp0BinBuiltinSet |
| : RVVWidenWOp0BuiltinSet<NAME # "_w", "f", |
| [["wv", "w", "wwv"], |
| ["wf", "w", "wwe"]]>; |
| |
| defvar TypeList = ["c","s","i","l","f","d"]; |
| defvar EEWList = [["8", "(Log2EEW:3)"], |
| ["16", "(Log2EEW:4)"], |
| ["32", "(Log2EEW:5)"], |
| ["64", "(Log2EEW:6)"]]; |
| |
| class IsFloat<string type> { |
| bit val = !or(!eq(type, "h"), !eq(type, "f"), !eq(type, "d")); |
| } |
| |
| multiclass RVVVLEBuiltin<list<string> types> { |
| let Name = NAME # "_v", |
| IRName = "vle", |
| IRNameMask ="vle_mask", |
| HasNoMaskedOverloaded = false, |
| ManualCodegen = [{ |
| IntrinsicTypes = {ResultType, Ops[1]->getType()}; |
| Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo()); |
| }], |
| ManualCodegenMask= [{ |
| IntrinsicTypes = {ResultType, Ops[3]->getType()}; |
| Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo()); |
| }] in { |
| foreach type = types in { |
| def : RVVBuiltin<"v", "vPCe", type>; |
| if !not(IsFloat<type>.val) then { |
| def : RVVBuiltin<"Uv", "UvPCUe", type>; |
| } |
| } |
| } |
| } |
| |
| multiclass RVVIndexedLoad<string op> { |
| let ManualCodegen = [{ |
| IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[2]->getType()}; |
| Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo()); |
| }], |
| ManualCodegenMask = [{ |
| IntrinsicTypes = {ResultType, Ops[2]->getType(), Ops[4]->getType()}; |
| Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo()); |
| }] in { |
| foreach type = TypeList in { |
| foreach eew_list = EEWList in { |
| defvar eew = eew_list[0]; |
| defvar eew_type = eew_list[1]; |
| let Name = op # eew # "_v", IRName = op, IRNameMask = op # "_mask" in { |
| def: RVVBuiltin<"v", "vPCe" # eew_type # "Uv", type>; |
| if !not(IsFloat<type>.val) then { |
| def: RVVBuiltin<"Uv", "UvPCUe" # eew_type # "Uv", type>; |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| multiclass RVVVLEFFBuiltin<list<string> types> { |
| let Name = NAME # "_v", |
| IRName = "vleff", |
| IRNameMask = "vleff_mask", |
| HasNoMaskedOverloaded = false, |
| ManualCodegen = [{ |
| { |
| IntrinsicTypes = {ResultType, Ops[2]->getType()}; |
| Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo()); |
| Value *NewVL = Ops[1]; |
| Ops.erase(Ops.begin() + 1); |
| llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); |
| llvm::Value *LoadValue = Builder.CreateCall(F, Ops, ""); |
| llvm::Value *V = Builder.CreateExtractValue(LoadValue, {0}); |
| // Store new_vl. |
| clang::CharUnits Align = |
| CGM.getNaturalTypeAlignment(getContext().getSizeType()); |
| Builder.CreateStore(Builder.CreateExtractValue(LoadValue, {1}), |
| Address(NewVL, Align)); |
| return V; |
| } |
| }], |
| ManualCodegenMask = [{ |
| { |
| IntrinsicTypes = {ResultType, Ops[4]->getType()}; |
| Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo()); |
| Value *NewVL = Ops[2]; |
| Ops.erase(Ops.begin() + 2); |
| llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); |
| llvm::Value *LoadValue = Builder.CreateCall(F, Ops, ""); |
| llvm::Value *V = Builder.CreateExtractValue(LoadValue, {0}); |
| // Store new_vl. |
| clang::CharUnits Align = |
| CGM.getNaturalTypeAlignment(getContext().getSizeType()); |
| Builder.CreateStore(Builder.CreateExtractValue(LoadValue, {1}), |
| Address(NewVL, Align)); |
| return V; |
| } |
| }] in { |
| foreach type = types in { |
| def : RVVBuiltin<"v", "vPCePz", type>; |
| // Skip floating types for unsigned versions. |
| if !not(IsFloat<type>.val) then { |
| def : RVVBuiltin<"Uv", "UvPCUePz", type>; |
| } |
| } |
| } |
| } |
| |
| multiclass RVVVSEBuiltin<list<string> types> { |
| let Name = NAME # "_v", |
| IRName = "vse", |
| IRNameMask = "vse_mask", |
| HasMaskedOffOperand = false, |
| PermuteOperands = [1, 0], // C/C++ Operand: (ptr, value, vl). Builtin: (value, ptr, vl) |
| ManualCodegen = [{ |
| Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo()); |
| IntrinsicTypes = {Ops[0]->getType(), Ops[2]->getType()}; |
| }], |
| ManualCodegenMask= [{ |
| Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo()); |
| IntrinsicTypes = {Ops[0]->getType(), Ops[3]->getType()}; |
| }] in { |
| foreach type = types in { |
| def : RVVBuiltin<"v", "0vPe", type>; |
| if !not(IsFloat<type>.val) then { |
| def : RVVBuiltin<"Uv", "0UvPUe", type>; |
| } |
| } |
| } |
| } |
| |
| // 6. Configuration-Setting Instructions |
| // 6.1. vsetvli/vsetvl instructions |
| let HasVL = false, |
| HasMask = false, |
| HasSideEffects = true, |
| Log2LMUL = [0], |
| ManualCodegen = [{IntrinsicTypes = {ResultType};}] in // Set XLEN type |
| { |
| // vsetvl is a macro because for it require constant integers in SEW and LMUL. |
| let HeaderCode = |
| [{ |
| #define vsetvl_e8mf8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 5) |
| #define vsetvl_e8mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 6) |
| #define vsetvl_e8mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 7) |
| #define vsetvl_e8m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 0) |
| #define vsetvl_e8m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 1) |
| #define vsetvl_e8m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 2) |
| #define vsetvl_e8m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 3) |
| |
| #define vsetvl_e16mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 6) |
| #define vsetvl_e16mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 7) |
| #define vsetvl_e16m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 0) |
| #define vsetvl_e16m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 1) |
| #define vsetvl_e16m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 2) |
| #define vsetvl_e16m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 3) |
| |
| #define vsetvl_e32mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 7) |
| #define vsetvl_e32m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 0) |
| #define vsetvl_e32m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 1) |
| #define vsetvl_e32m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 2) |
| #define vsetvl_e32m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 3) |
| |
| #define vsetvl_e64m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 0) |
| #define vsetvl_e64m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 1) |
| #define vsetvl_e64m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 2) |
| #define vsetvl_e64m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 3) |
| |
| }] in |
| def vsetvli : RVVBuiltin<"", "zzKzKz", "i">; |
| |
| let HeaderCode = |
| [{ |
| #define vsetvlmax_e8mf8() __builtin_rvv_vsetvlimax(0, 5) |
| #define vsetvlmax_e8mf4() __builtin_rvv_vsetvlimax(0, 6) |
| #define vsetvlmax_e8mf2() __builtin_rvv_vsetvlimax(0, 7) |
| #define vsetvlmax_e8m1() __builtin_rvv_vsetvlimax(0, 0) |
| #define vsetvlmax_e8m2() __builtin_rvv_vsetvlimax(0, 1) |
| #define vsetvlmax_e8m4() __builtin_rvv_vsetvlimax(0, 2) |
| #define vsetvlmax_e8m8() __builtin_rvv_vsetvlimax(0, 3) |
| |
| #define vsetvlmax_e16mf4() __builtin_rvv_vsetvlimax(1, 6) |
| #define vsetvlmax_e16mf2() __builtin_rvv_vsetvlimax(1, 7) |
| #define vsetvlmax_e16m1() __builtin_rvv_vsetvlimax(1, 0) |
| #define vsetvlmax_e16m2() __builtin_rvv_vsetvlimax(1, 1) |
| #define vsetvlmax_e16m4() __builtin_rvv_vsetvlimax(1, 2) |
| #define vsetvlmax_e16m8() __builtin_rvv_vsetvlimax(1, 3) |
| |
| #define vsetvlmax_e32mf2() __builtin_rvv_vsetvlimax(2, 7) |
| #define vsetvlmax_e32m1() __builtin_rvv_vsetvlimax(2, 0) |
| #define vsetvlmax_e32m2() __builtin_rvv_vsetvlimax(2, 1) |
| #define vsetvlmax_e32m4() __builtin_rvv_vsetvlimax(2, 2) |
| #define vsetvlmax_e32m8() __builtin_rvv_vsetvlimax(2, 3) |
| |
| #define vsetvlmax_e64m1() __builtin_rvv_vsetvlimax(3, 0) |
| #define vsetvlmax_e64m2() __builtin_rvv_vsetvlimax(3, 1) |
| #define vsetvlmax_e64m4() __builtin_rvv_vsetvlimax(3, 2) |
| #define vsetvlmax_e64m8() __builtin_rvv_vsetvlimax(3, 3) |
| |
| }] in |
| def vsetvlimax : RVVBuiltin<"", "zKzKz", "i">; |
| } |
| |
| // 7. Vector Loads and Stores |
| // 7.4. Vector Unit-Stride Instructions |
| defm vle8: RVVVLEBuiltin<["c"]>; |
| defm vle16: RVVVLEBuiltin<["s"]>; |
| defm vle32: RVVVLEBuiltin<["i","f"]>; |
| defm vle64: RVVVLEBuiltin<["l","d"]>; |
| |
| defm vle8ff: RVVVLEFFBuiltin<["c"]>; |
| defm vle16ff: RVVVLEFFBuiltin<["s"]>; |
| defm vle32ff: RVVVLEFFBuiltin<["i", "f"]>; |
| defm vle64ff: RVVVLEFFBuiltin<["l", "d"]>; |
| |
| defm vse8 : RVVVSEBuiltin<["c"]>; |
| defm vse16: RVVVSEBuiltin<["s"]>; |
| defm vse32: RVVVSEBuiltin<["i","f"]>; |
| defm vse64: RVVVSEBuiltin<["l","d"]>; |
| |
| // 7.6. Vector Indexed Instructions |
| defm : RVVIndexedLoad<"vluxei">; |
| defm : RVVIndexedLoad<"vloxei">; |
| |
| // 12. Vector Integer Arithmetic Instructions |
| // 12.1. Vector Single-Width Integer Add and Subtract |
| defm vadd : RVVIntBinBuiltinSet; |
| defm vsub : RVVIntBinBuiltinSet; |
| defm vrsub : RVVOutOp1BuiltinSet<"vrsub", "csil", |
| [["vx", "v", "vve"], |
| ["vx", "Uv", "UvUvUe"]]>; |
| |
| // 12.2. Vector Widening Integer Add/Subtract |
| // Widening unsigned integer add/subtract, 2*SEW = SEW +/- SEW |
| defm vwaddu : RVVUnsignedWidenBinBuiltinSet; |
| defm vwsubu : RVVUnsignedWidenBinBuiltinSet; |
| // Widening signed integer add/subtract, 2*SEW = SEW +/- SEW |
| defm vwadd : RVVSignedWidenBinBuiltinSet; |
| defm vwsub : RVVSignedWidenBinBuiltinSet; |
| // Widening unsigned integer add/subtract, 2*SEW = 2*SEW +/- SEW |
| defm vwaddu : RVVUnsignedWidenOp0BinBuiltinSet; |
| defm vwsubu : RVVUnsignedWidenOp0BinBuiltinSet; |
| // Widening signed integer add/subtract, 2*SEW = 2*SEW +/- SEW |
| defm vwadd : RVVSignedWidenOp0BinBuiltinSet; |
| defm vwsub : RVVSignedWidenOp0BinBuiltinSet; |
| |
| // 12.3. Vector Integer Extension |
| let Log2LMUL = [-3, -2, -1, 0, 1, 2] in { |
| def vsext_vf2 : RVVIntExt<"vsext", "w", "wv", "csi">; |
| def vzext_vf2 : RVVIntExt<"vzext", "Uw", "UwUv", "csi">; |
| } |
| let Log2LMUL = [-3, -2, -1, 0, 1] in { |
| def vsext_vf4 : RVVIntExt<"vsext", "q", "qv", "cs">; |
| def vzext_vf4 : RVVIntExt<"vzext", "Uq", "UqUv", "cs">; |
| } |
| let Log2LMUL = [-3, -2, -1, 0] in { |
| def vsext_vf8 : RVVIntExt<"vsext", "o", "ov", "c">; |
| def vzext_vf8 : RVVIntExt<"vzext", "Uo", "UoUv", "c">; |
| } |
| |
| // 12.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions |
| let HasMask = false in { |
| defm vadc : RVVCarryinBuiltinSet; |
| defm vmadc : RVVCarryOutInBuiltinSet<"vmadc_carry_in">; |
| defm vmadc : RVVIntMaskOutBuiltinSet; |
| defm vsbc : RVVCarryinBuiltinSet; |
| defm vmsbc : RVVCarryOutInBuiltinSet<"vmsbc_borrow_in">; |
| defm vmsbc : RVVIntMaskOutBuiltinSet; |
| } |
| |
| // 12.5. Vector Bitwise Logical Instructions |
| defm vand : RVVIntBinBuiltinSet; |
| defm vxor : RVVIntBinBuiltinSet; |
| defm vor : RVVIntBinBuiltinSet; |
| |
| // 12.6. Vector Single-Width Bit Shift Instructions |
| defm vsll : RVVShiftBuiltinSet; |
| defm vsrl : RVVUnsignedShiftBuiltinSet; |
| defm vsra : RVVSignedShiftBuiltinSet; |
| |
| // 12.7. Vector Narrowing Integer Right Shift Instructions |
| defm vnsrl : RVVUnsignedNShiftBuiltinSet; |
| defm vnsra : RVVSignedNShiftBuiltinSet; |
| |
| // 12.8. Vector Integer Comparison Instructions |
| defm vmseq : RVVIntMaskOutBuiltinSet; |
| defm vmsne : RVVIntMaskOutBuiltinSet; |
| defm vmsltu : RVVUnsignedMaskOutBuiltinSet; |
| defm vmslt : RVVSignedMaskOutBuiltinSet; |
| defm vmsleu : RVVUnsignedMaskOutBuiltinSet; |
| defm vmsle : RVVSignedMaskOutBuiltinSet; |
| defm vmsgtu : RVVOp0Op1BuiltinSet<"vmsgtu", "csil", |
| [["vx", "Uvm", "mUvUe"]]>; |
| defm vmsgt : RVVOp0Op1BuiltinSet<"vmsgt", "csil", |
| [["vx", "vm", "mve"]]>; |
| |
| // 12.9. Vector Integer Min/Max Instructions |
| defm vminu : RVVUnsignedBinBuiltinSet; |
| defm vmin : RVVSignedBinBuiltinSet; |
| defm vmaxu : RVVUnsignedBinBuiltinSet; |
| defm vmax : RVVSignedBinBuiltinSet; |
| |
| // 12.10. Vector Single-Width Integer Multiply Instructions |
| defm vmul : RVVIntBinBuiltinSet; |
| defm vmulh : RVVSignedBinBuiltinSet; |
| defm vmulhu : RVVUnsignedBinBuiltinSet; |
| defm vmulhsu : RVVOutOp1BuiltinSet<"vmulhsu", "csil", |
| [["vv", "v", "vvUv"], |
| ["vx", "v", "vvUe"]]>; |
| |
| // 12.11. Vector Integer Divide Instructions |
| defm vdivu : RVVUnsignedBinBuiltinSet; |
| defm vdiv : RVVSignedBinBuiltinSet; |
| defm vremu : RVVUnsignedBinBuiltinSet; |
| defm vrem : RVVSignedBinBuiltinSet; |
| |
| // 12.12. Vector Widening Integer Multiply Instructions |
| let Log2LMUL = [-3, -2, -1, 0, 1, 2] in { |
| defm vwmul : RVVOutOp0Op1BuiltinSet<"vwmul", "csi", |
| [["vv", "w", "wvv"], |
| ["vx", "w", "wve"]]>; |
| defm vwmulu : RVVOutOp0Op1BuiltinSet<"vwmulu", "csi", |
| [["vv", "Uw", "UwUvUv"], |
| ["vx", "Uw", "UwUvUe"]]>; |
| defm vwmulsu : RVVOutOp0Op1BuiltinSet<"vwmulsu", "csi", |
| [["vv", "w", "wvUv"], |
| ["vx", "w", "wvUe"]]>; |
| } |
| |
| // 12.13. Vector Single-Width Integer Multiply-Add Instructions |
| defm vmacc : RVVIntTerBuiltinSet; |
| defm vnmsac : RVVIntTerBuiltinSet; |
| defm vmadd : RVVIntTerBuiltinSet; |
| defm vnmsub : RVVIntTerBuiltinSet; |
| |
| // 12.14. Vector Widening Integer Multiply-Add Instructions |
| let HasMaskedOffOperand = false, |
| Log2LMUL = [-3, -2, -1, 0, 1, 2] in { |
| defm vwmaccu : RVVOutOp1Op2BuiltinSet<"vwmaccu", "csi", |
| [["vv", "Uw", "UwUwUvUv"], |
| ["vx", "Uw", "UwUwUeUv"]]>; |
| defm vwmacc : RVVOutOp1Op2BuiltinSet<"vwmacc", "csi", |
| [["vv", "w", "wwvv"], |
| ["vx", "w", "wwev"]]>; |
| defm vwmaccsu : RVVOutOp1Op2BuiltinSet<"vwmaccsu", "csi", |
| [["vv", "w", "wwvUv"], |
| ["vx", "w", "wweUv"]]>; |
| defm vwmaccus : RVVOutOp1Op2BuiltinSet<"vwmaccus", "csi", |
| [["vx", "w", "wwUev"]]>; |
| } |
| |
| // 12.15. Vector Integer Merge Instructions |
| // TODO |
| |
| // 12.16. Vector Integer Move Instructions |
| // TODO |
| |
| // 13. Vector Fixed-Point Arithmetic Instructions |
| // 13.1. Vector Single-Width Saturating Add and Subtract |
| defm vsaddu : RVVUnsignedBinBuiltinSet; |
| defm vsadd : RVVSignedBinBuiltinSet; |
| defm vssubu : RVVUnsignedBinBuiltinSet; |
| defm vssub : RVVSignedBinBuiltinSet; |
| |
| // 13.2. Vector Single-Width Averaging Add and Subtract |
| defm vaaddu : RVVUnsignedBinBuiltinSet; |
| defm vaadd : RVVSignedBinBuiltinSet; |
| defm vasubu : RVVUnsignedBinBuiltinSet; |
| defm vasub : RVVSignedBinBuiltinSet; |
| |
| // 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation |
| defm vsmul : RVVSignedBinBuiltinSet; |
| |
| // 13.4. Vector Single-Width Scaling Shift Instructions |
| defm vssrl : RVVUnsignedShiftBuiltinSet; |
| defm vssra : RVVSignedShiftBuiltinSet; |
| |
| // 13.5. Vector Narrowing Fixed-Point Clip Instructions |
| defm vnclipu : RVVUnsignedNShiftBuiltinSet; |
| defm vnclip : RVVSignedNShiftBuiltinSet; |
| |
| // 14. Vector Floating-Point Instructions |
| // 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions |
| defm vfadd : RVVFloatingBinBuiltinSet; |
| defm vfsub : RVVFloatingBinBuiltinSet; |
| defm vfrsub : RVVFloatingBinVFBuiltinSet; |
| |
| // 14.3. Vector Widening Floating-Point Add/Subtract Instructions |
| // Widening FP add/subtract, 2*SEW = SEW +/- SEW |
| defm vfwadd : RVVFloatingWidenBinBuiltinSet; |
| defm vfwsub : RVVFloatingWidenBinBuiltinSet; |
| // Widening FP add/subtract, 2*SEW = 2*SEW +/- SEW |
| defm vfwadd : RVVFloatingWidenOp0BinBuiltinSet; |
| defm vfwsub : RVVFloatingWidenOp0BinBuiltinSet; |
| |
| // 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions |
| defm vfmul : RVVFloatingBinBuiltinSet; |
| defm vfdiv : RVVFloatingBinBuiltinSet; |
| defm vfrdiv : RVVFloatingBinVFBuiltinSet; |
| |
| // 14.5. Vector Widening Floating-Point Multiply |
| let Log2LMUL = [-1, 0, 1, 2] in { |
| defm vfwmul : RVVOutOp0Op1BuiltinSet<"vfwmul", "f", |
| [["vv", "w", "wvv"], |
| ["vf", "w", "wve"]]>; |
| } |
| |
| // 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions |
| defm vfmacc : RVVFloatingTerBuiltinSet; |
| defm vfnmacc : RVVFloatingTerBuiltinSet; |
| defm vfmsac : RVVFloatingTerBuiltinSet; |
| defm vfnmsac : RVVFloatingTerBuiltinSet; |
| defm vfmadd : RVVFloatingTerBuiltinSet; |
| defm vfnmadd : RVVFloatingTerBuiltinSet; |
| defm vfmsub : RVVFloatingTerBuiltinSet; |
| defm vfnmsub : RVVFloatingTerBuiltinSet; |
| |
| // 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions |
| defm vfwmacc : RVVFloatingWidenTerBuiltinSet; |
| defm vfwnmacc : RVVFloatingWidenTerBuiltinSet; |
| defm vfwmsac : RVVFloatingWidenTerBuiltinSet; |
| defm vfwnmsac : RVVFloatingWidenTerBuiltinSet; |
| |
| // 14.8. Vector Floating-Point Square-Root Instruction |
| def vfsqrt : RVVFloatingUnaryVVBuiltin; |
| |
| // 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction |
| def vfrsqrt7 : RVVFloatingUnaryVVBuiltin; |
| |
| // 14.10. Vector Floating-Point Reciprocal Estimate Instruction |
| def vfrec7 : RVVFloatingUnaryVVBuiltin; |
| |
| // 14.11. Vector Floating-Point MIN/MAX Instructions |
| defm vfmin : RVVFloatingBinBuiltinSet; |
| defm vfmax : RVVFloatingBinBuiltinSet; |
| |
| // 14.12. Vector Floating-Point Sign-Injection Instructions |
| defm vfsgnj : RVVFloatingBinBuiltinSet; |
| defm vfsgnjn : RVVFloatingBinBuiltinSet; |
| defm vfsgnjx : RVVFloatingBinBuiltinSet; |
| |
| // 14.13. Vector Floating-Point Compare Instructions |
| defm vmfeq : RVVFloatingMaskOutBuiltinSet; |
| defm vmfne : RVVFloatingMaskOutBuiltinSet; |
| defm vmflt : RVVFloatingMaskOutBuiltinSet; |
| defm vmfle : RVVFloatingMaskOutBuiltinSet; |
| defm vmfgt : RVVFloatingMaskOutVFBuiltinSet; |
| defm vmfge : RVVFloatingMaskOutVFBuiltinSet; |
| |
| // 14.14. Vector Floating-Point Classify Instruction |
| let Name = "vfclass_v" in |
| def vfclass : RVVOp0Builtin<"Uv", "Uvv", "fd">; |
| |
| // 14.15. Vector Floating-Point Merge Instructio |
| let Name = "vfmerge_vfm", HasMask = false, PermuteOperands = [2, 0, 1] in |
| def vfmerge : RVVOutOp1Builtin<"v", "vvem", "fd">; |
| |
| // 14.16. Vector Floating-Point Move Instruction |
| // TODO |
| |
| // 14.17. Single-Width Floating-Point/Integer Type-Convert Instructions |
| // TODO |
| |
| // 14.18. Widening Floating-Point/Integer Type-Convert Instructions |
| // TODO |
| |
| // 14.19. Narrowing Floating-Point/Integer Type-Convert Instructions |
| // TODO |