| //===- VectorOps.td - Vector op definitions ---------------*- tablegen -*-====// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // Defines MLIR vector operations. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef MLIR_DIALECT_VECTOR_IR_VECTOR_OPS |
| #define MLIR_DIALECT_VECTOR_IR_VECTOR_OPS |
| |
| include "mlir/Dialect/Arith/IR/ArithBase.td" |
| include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td" |
| include "mlir/Dialect/Vector/Interfaces/MaskableOpInterface.td" |
| include "mlir/Dialect/Vector/Interfaces/MaskingOpInterface.td" |
| include "mlir/Dialect/Vector/IR/Vector.td" |
| include "mlir/Dialect/Vector/IR/VectorAttributes.td" |
| include "mlir/Interfaces/AlignmentAttrInterface.td" |
| include "mlir/Interfaces/ControlFlowInterfaces.td" |
| include "mlir/Interfaces/DestinationStyleOpInterface.td" |
| include "mlir/Interfaces/IndexingMapOpInterface.td" |
| include "mlir/Interfaces/InferIntRangeInterface.td" |
| include "mlir/Interfaces/InferTypeOpInterface.td" |
| include "mlir/Interfaces/MemOpInterfaces.td" |
| include "mlir/Interfaces/SideEffectInterfaces.td" |
| include "mlir/Interfaces/VectorInterfaces.td" |
| include "mlir/Interfaces/ViewLikeInterface.td" |
| include "mlir/IR/BuiltinAttributes.td" |
| include "mlir/IR/EnumAttr.td" |
| |
| // TODO: Add an attribute to specify a different algebra with operators other |
| // than the current set: {*, +}. |
| def Vector_ContractionOp : |
| Vector_Op<"contract", [ |
| IndexingMapOpInterface, |
| Pure, |
| PredOpTrait<"lhs and rhs have same element type", TCopVTEtIsSameAs<0, 1>>, |
| PredOpTrait<"third operand acc and result have same element type", |
| TCresVTEtIsSameAsOpBase<0, 2>>, |
| DeclareOpInterfaceMethods<MaskableOpInterface>, |
| DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]> |
| ]>, |
| Arguments<(ins AnyVectorOfNonZeroRank:$lhs, AnyVectorOfNonZeroRank:$rhs, AnyType:$acc, |
| ArrayAttr:$indexing_maps, |
| Vector_IteratorTypeArrayAttr:$iterator_types, |
| DefaultValuedAttr<Vector_CombiningKindAttr, |
| "CombiningKind::ADD">:$kind)>, |
| Results<(outs AnyType)> { |
| let summary = "vector contraction operation"; |
| let description = [{ |
| Computes the sum of products of vector elements along contracting |
| dimension pairs from 2 vectors of rank M and N respectively, adds this |
| intermediate result to the accumulator argument of rank K, and returns a |
| vector result of rank K (where K = num_lhs_free_dims + num_rhs_free_dims + |
| num_batch_dims (see dimension type descriptions below)). For K = 0 (no |
| free or batch dimensions), the accumulator and output are a scalar. |
| |
| If operands and the result have types of different bitwidths, operands are |
| promoted to have the same bitwidth as the result before performing the |
| contraction. For integer types, only signless integer types are supported, |
| and the promotion happens via sign extension. |
| |
| An iterator type attribute list must be specified, where each element of |
| the list represents an iterator with one of the following types: |
| |
| * "reduction": reduction dimensions are present in the lhs and rhs |
| arguments but not in the output (and accumulator |
| argument). These are the dimensions along which the vector |
| contraction op computes the sum of products, and |
| contracting dimension pair dimension sizes must match |
| between lhs/rhs. |
| |
| * "parallel": Batch dimensions are iterator type "parallel", and |
| are non-contracting dimensions present in the lhs, rhs and |
| output. The lhs/rhs co-iterate along the batch dimensions, |
| which should be expressed in their indexing maps. |
| |
| Free dimensions are iterator type "parallel", and are |
| non-contraction, non-batch dimensions accessed by either the |
| lhs or rhs (but not both). The lhs and rhs free dimensions |
| are unrelated to each other and do not co-iterate, which |
| should be expressed in their indexing maps. |
| |
| An indexing map attribute list must be specified with an entry for lhs, rhs |
| and acc arguments. An indexing map attribute specifies a mapping from each |
| iterator in the iterator type list, to each dimension of an N-D vector. |
| |
| An optional kind attribute may be used to specify the combining function |
| between the intermediate result and accumulator argument of rank K. This |
| attribute can take the values `add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui` |
| /`and`/`or`/`xor` for integers, and `add`/`mul`/`minnumf`/`maxnumf` |
| /`minimumf`/`maximumf` for floats. The default is `add`. |
| |
| Example: |
| |
| ```mlir |
| // Simple DOT product (K = 0). |
| #contraction_accesses = [ |
| affine_map<(i) -> (i)>, |
| affine_map<(i) -> (i)>, |
| affine_map<(i) -> ()> |
| ] |
| #contraction_trait = { |
| indexing_maps = #contraction_accesses, |
| iterator_types = ["reduction"] |
| } |
| %3 = vector.contract #contraction_trait %0, %1, %2 |
| : vector<10xf32>, vector<10xf32> into f32 |
| |
| // 2D vector contraction with one contracting dimension (matmul, K = 2). |
| #contraction_accesses = [ |
| affine_map<(i, j, k) -> (i, k)>, |
| affine_map<(i, j, k) -> (k, j)>, |
| affine_map<(i, j, k) -> (i, j)> |
| ] |
| #contraction_trait = { |
| indexing_maps = #contraction_accesses, |
| iterator_types = ["parallel", "parallel", "reduction"] |
| } |
| |
| %3 = vector.contract #contraction_trait %0, %1, %2 |
| : vector<4x3xf32>, vector<3x7xf32> into vector<4x7xf32> |
| |
| // 4D to 3D vector contraction with two contracting dimensions and |
| // one batch dimension (K = 3). |
| #contraction_accesses = [ |
| affine_map<(b0, f0, f1, c0, c1) -> (c0, b0, c1, f0)>, |
| affine_map<(b0, f0, f1, c0, c1) -> (b0, c1, c0, f1)>, |
| affine_map<(b0, f0, f1, c0, c1) -> (b0, f0, f1)> |
| ] |
| #contraction_trait = { |
| indexing_maps = #contraction_accesses, |
| iterator_types = ["parallel", "parallel", "parallel", |
| "reduction", "reduction"] |
| } |
| |
| %4 = vector.contract #contraction_trait %0, %1, %2 |
| : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x5xf32> |
| |
| // Vector contraction with mixed typed. lhs/rhs have different element |
| // types than accumulator/result. |
| %5 = vector.contract #contraction_trait %0, %1, %2 |
| : vector<10xf16>, vector<10xf16> into f32 |
| |
| // Contract with max (K = 0). |
| #contraction_accesses = [ |
| affine_map<(i) -> (i)>, |
| affine_map<(i) -> (i)>, |
| affine_map<(i) -> ()> |
| ] |
| #contraction_trait = { |
| indexing_maps = #contraction_accesses, |
| iterator_types = ["reduction"], |
| kind = #vector.kind<maxnumf> |
| } |
| %6 = vector.contract #contraction_trait %0, %1, %2 |
| : vector<10xf32>, vector<10xf32> into f32 |
| ``` |
| }]; |
| let builders = [ |
| OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc, |
| "ArrayAttr":$indexingMaps, "ArrayAttr":$iteratorTypes)>, |
| OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc, |
| "ArrayRef<ArrayRef<AffineExpr>>":$indexingExprs, |
| "ArrayRef<IteratorType>":$iteratorTypes)>, |
| OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc, |
| "ArrayAttr":$indexingMaps, "ArrayAttr":$iteratorTypes, |
| "CombiningKind":$kind)> |
| ]; |
| let extraClassDeclaration = [{ |
| VectorType getLhsType() { |
| return ::llvm::cast<VectorType>(getLhs().getType()); |
| } |
| VectorType getRhsType() { |
| return ::llvm::cast<VectorType>(getRhs().getType()); |
| } |
| Type getAccType() { return getAcc().getType(); } |
| Type getResultType() { return getResult().getType(); } |
| SmallVector<StringRef> getTraitAttrNames(); |
| static unsigned getAccOperandIndex() { return 2; } |
| |
| llvm::SmallVector<::mlir::AffineMap, 4> getIndexingMapsArray() { |
| return llvm::to_vector<4>(getIndexingMaps().getAsValueRange<::mlir::AffineMapAttr>()); |
| } |
| |
| // Returns the bounds of each dimension in the iteration space spanned |
| // by the iterator types of this operation. |
| void getIterationBounds(SmallVectorImpl<int64_t> &iterationBounds); |
| |
| // Returns a list of index maps, where there is a list entry for each |
| // op indexing map attribute (i.e. one for each input and output, with |
| // the output listed last). Each index map, maps from this operations |
| // iteration space, to vector dimensions of the maps input/output. |
| void getIterationIndexMap( |
| std::vector<DenseMap<int64_t, int64_t>> &iterationIndexMap); |
| |
| std::vector<std::pair<int64_t, int64_t>> getContractingDimMap(); |
| std::vector<std::pair<int64_t, int64_t>> getBatchDimMap(); |
| |
| static CombiningKind getDefaultKind() { |
| return CombiningKind::ADD; |
| } |
| |
| SmallVector<IteratorType> getIteratorTypesArray() { |
| auto range = |
| getIteratorTypes() |
| .template getAsValueRange<IteratorTypeAttr, IteratorType>(); |
| return {range.begin(), range.end()}; |
| } |
| |
| //===------------------------------------------------------------------===// |
| // IndexingMapOpInterface interface methods implementation. |
| //===------------------------------------------------------------------===// |
| ArrayRef<int64_t> getShape(OpOperand * opOperand) { |
| Type t = opOperand->get().getType(); |
| if (auto vt = dyn_cast<VectorType>(t)) |
| return vt.getShape(); |
| return {}; |
| } |
| }]; |
| |
| let hasCanonicalizer = 1; |
| let hasCustomAssemblyFormat = 1; |
| let hasVerifier = 1; |
| } |
| |
| def Vector_ReductionOp : |
| Vector_Op<"reduction", [Pure, |
| PredOpTrait<"source operand and result have same element type", |
| TCresVTEtIsSameAsOpBase<0, 0>>, |
| OptionalTypesMatchWith<"dest and acc have the same type", |
| "dest", "acc", "::llvm::cast<Type>($_self)">, |
| DeclareOpInterfaceMethods<ArithFastMathInterface>, |
| DeclareOpInterfaceMethods<MaskableOpInterface>, |
| DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]> |
| ]>, |
| Arguments<(ins Vector_CombiningKindAttr:$kind, |
| AnyVectorOfAnyRank:$vector, |
| Optional<AnyType>:$acc, |
| DefaultValuedAttr< |
| Arith_FastMathAttr, |
| "::mlir::arith::FastMathFlags::none">:$fastmath)>, |
| Results<(outs AnyType:$dest)> { |
| let summary = "reduction operation"; |
| let description = [{ |
| Reduces an 1-D vector "horizontally" into a scalar using the given |
| operation: `add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui`/`and`/`or`/`xor` for |
| integers, and `add`/`mul`/`minnumf`/`maxnumf`/`minimumf`/`maximumf` for |
| floats. Reductions also allow an optional fused accumulator. |
| |
| Note that these operations are restricted to 1-D vectors to remain |
| close to the corresponding LLVM intrinsics: |
| |
| http://llvm.org/docs/LangRef.html#vector-reduction-intrinsics |
| |
| Example: |
| |
| ```mlir |
| %1 = vector.reduction <add>, %0 : vector<16xf32> into f32 |
| |
| %3 = vector.reduction <xor>, %2 : vector<4xi32> into i32 |
| |
| %4 = vector.reduction <mul>, %0, %1 : vector<16xf32> into f32 |
| ``` |
| }]; |
| let extraClassDeclaration = [{ |
| VectorType getSourceVectorType() { |
| return ::llvm::cast<VectorType>(getVector().getType()); |
| } |
| }]; |
| let builders = [ |
| // Builder that infers the type of `dest`. |
| OpBuilder<(ins "CombiningKind":$kind, "Value":$vector, "Value":$acc, |
| CArg<"::mlir::arith::FastMathFlags", |
| "::mlir::arith::FastMathFlags::none">:$fastMathFlags)>, |
| // Builder that infers the type of `dest` and has no accumulator. |
| OpBuilder<(ins "CombiningKind":$kind, "Value":$vector, |
| CArg<"::mlir::arith::FastMathFlags", |
| "::mlir::arith::FastMathFlags::none">:$fastMathFlags)> |
| ]; |
| |
| let assemblyFormat = "$kind `,` $vector (`,` $acc^)? (`fastmath` `` $fastmath^)?" |
| " attr-dict `:` type($vector) `into` type($dest)"; |
| let hasCanonicalizer = 1; |
| let hasVerifier = 1; |
| } |
| |
| def Vector_MultiDimReductionOp : |
| Vector_Op<"multi_reduction", [Pure, |
| AllTypesMatch<["dest", "acc"]>, |
| PredOpTrait<"source operand and result have same element type", |
| TCresVTEtIsSameAsOpBase<0, 0>>, |
| DeclareOpInterfaceMethods<InferTypeOpInterface>, |
| DeclareOpInterfaceMethods<MaskableOpInterface>, |
| DeclareOpInterfaceMethods<VectorUnrollOpInterface, |
| ["getShapeForUnroll"]>]>, |
| Arguments<(ins Vector_CombiningKindAttr:$kind, |
| AnyVectorOfNonZeroRank:$source, |
| AnyType:$acc, |
| DenseI64ArrayAttr:$reduction_dims)>, |
| Results<(outs AnyType:$dest)> { |
| let summary = "Multi-dimensional reduction operation"; |
| let description = [{ |
| Reduces an n-D vector into an (n-k)-D vector (or a scalar when k == n) |
| using the given operation: `add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui` |
| /`and`/`or`/`xor` for integers, and `add`/`mul`/`minnumf`/`maxnumf`/`minimumf` |
| /`maximumf` for floats. |
| Takes an initial accumulator operand. |
| |
| Example: |
| |
| ```mlir |
| %1 = vector.multi_reduction <add>, %0, %acc0 [1, 3] : |
| vector<4x8x16x32xf32> to vector<4x16xf32> |
| %2 = vector.multi_reduction <add>, %1, %acc1 [0, 1] : |
| vector<4x16xf32> to f32 |
| ``` |
| }]; |
| let builders = [ |
| OpBuilder<(ins "Value":$source, "Value":$acc, |
| "ArrayRef<bool>":$reductionMask, "CombiningKind":$kind)> |
| ]; |
| let extraClassDeclaration = [{ |
| VectorType getSourceVectorType() { |
| return ::llvm::cast<VectorType>(getSource().getType()); |
| } |
| Type getDestType() { |
| return getDest().getType(); |
| } |
| |
| bool isReducedDim(int64_t d) { |
| assert(d >= 0 && d < static_cast<int64_t>(getReductionMask().size()) && |
| "d overflows the number of dims"); |
| return getReductionMask()[d]; |
| } |
| |
| SmallVector<bool> getReductionMask() { |
| SmallVector<bool> res(getSourceVectorType().getRank(), false); |
| for (int64_t dim : getReductionDims()) |
| res[dim] = true; |
| return res; |
| } |
| static SmallVector<bool> getReductionMask( |
| ArrayRef<int64_t> reductionDims, unsigned sourceRank) { |
| SmallVector<bool> res(sourceRank, false); |
| for (auto idx : reductionDims) |
| res[idx] = true; |
| return res; |
| } |
| }]; |
| let assemblyFormat = |
| "$kind `,` $source `,` $acc attr-dict $reduction_dims `:` type($source) `to` type($dest)"; |
| let hasFolder = 1; |
| let hasCanonicalizer = 1; |
| let hasVerifier = 1; |
| } |
| |
| def Vector_BroadcastOp : |
| Vector_Op<"broadcast", [Pure, |
| DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>, |
| DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>, |
| PredOpTrait<"source operand and result have same element type", |
| TCresVTEtIsSameAsOpBase<0, 0>>]>, |
| Arguments<(ins AnyType:$source)>, |
| Results<(outs AnyVectorOfAnyRank:$vector)> { |
| let summary = "broadcast operation"; |
| let description = [{ |
| Broadcasts the scalar or k-D vector value in the source operand |
| to a n-D result vector such that the broadcast makes sense, i.e., |
| the source operand is duplicated to match the given rank and sizes |
| in the result vector. The legality rules are: |
| * the source operand must have the same element type as the result type |
| * a k-D vector <s_1 x .. x s_k x type> can be broadcast to |
| a n-D vector <t_1 x .. x t_n x type> if |
| * k <= n, and |
| * the sizes in the trailing dimensions n-k < i <= n with j=i+k-n |
| match exactly as s_j = t_i or s_j = 1: |
| ``` |
| t_1 x .. t_n-k x t_n-k+1 x .. x t_i x .. x t_n |
| s_1 x .. x s_j x .. x s_k |
| <duplication> <potential stretch> |
| ``` |
| * in addition, any scalable unit dimension, `[1]`, must match exactly. |
| |
| The source operand is duplicated over all the missing leading dimensions |
| and stretched over the trailing dimensions where the source has a non-equal |
| dimension of 1 (stretching a trailing dimension is also referred to as |
| "dim-1" broadcasting). These rules imply that any scalar broadcast (k=0) to |
| any shaped vector with the same element type is always legal. |
| |
| Example: |
| |
| ```mlir |
| %0 = arith.constant 0.0 : f32 |
| %1 = vector.broadcast %0 : f32 to vector<16xf32> |
| %2 = vector.broadcast %1 : vector<16xf32> to vector<4x16xf32> |
| ``` |
| }]; |
| let extraClassDeclaration = [{ |
| Type getSourceType() { return getSource().getType(); } |
| VectorType getResultVectorType() { |
| return ::llvm::cast<VectorType>(getVector().getType()); |
| } |
| |
| /// Return the dimensions of the result vector that were formerly ones in the |
| /// source tensor and thus correspond to "dim-1" broadcasting. |
| llvm::SetVector<int64_t> computeBroadcastedUnitDims(); |
| |
| /// Broadcast `value` to a vector of `dstShape`, knowing that exactly the |
| /// `broadcastedDims` dimensions in the dstShape are broadcasted. |
| /// This requires (and asserts) that the broadcast is free of "dim-1" |
| /// broadcasting. |
| /// Since vector.broadcast only allows expanding leading dimensions, an extra |
| /// vector.transpose may be inserted to make the broadcast possible. |
| /// `value`, `dstShape` and `broadcastedDims` must be properly specified or |
| /// the helper will assert. This means: |
| /// 1. `dstShape` must not be empty. |
| /// 2. `broadcastedDims` must be confined to [0 .. rank(value.getResultVectorType)] |
| /// 2. `dstShape` trimmed of the dimensions specified in `broadcastedDims` |
| // must match the `value` shape. |
| static Value createOrFoldBroadcastOp( |
| OpBuilder &b, Value value, |
| ArrayRef<int64_t> dstShape, |
| const llvm::SetVector<int64_t> &broadcastedDims); |
| }]; |
| let assemblyFormat = "$source attr-dict `:` type($source) `to` type($vector)"; |
| let hasFolder = 1; |
| let hasCanonicalizer = 1; |
| let hasVerifier = 1; |
| } |
| |
| def Vector_ShuffleOp |
| : Vector_Op< |
| "shuffle", |
| [Pure, |
| PredOpTrait<"first operand v1 and result have same element type", |
| TCresVTEtIsSameAsOpBase<0, 0>>, |
| PredOpTrait<"second operand v2 and result have same element type", |
| TCresVTEtIsSameAsOpBase<0, 1>>, |
| InferTypeOpAdaptor]>, |
| Arguments<(ins AnyFixedVectorOfAnyRank:$v1, AnyFixedVectorOfAnyRank:$v2, |
| DenseI64ArrayAttr:$mask)>, |
| Results<(outs AnyVectorOfNonZeroRank:$vector)> { |
| let summary = "shuffle operation"; |
| let description = [{ |
| The shuffle operation constructs a permutation (or duplication) of elements |
| from two input vectors, returning a vector with the same element type as |
| the input and a length that is the same as the shuffle mask. The two input |
| vectors must have the same element type, same rank, and trailing dimension |
| sizes and shuffles their values in the leading dimension (which may differ |
| in size) according to the given mask. The legality rules are: |
| * the two operands must have the same element type as the result |
| - Either, the two operands and the result must have the same |
| rank and trailing dimension sizes, viz. given two k-D operands |
| v1 : <s_1 x s_2 x .. x s_k x type> and |
| v2 : <t_1 x t_2 x .. x t_k x type> |
| we have s_i = t_i for all 1 < i <= k |
| - Or, the two operands must be 0-D vectors and the result is a 1-D vector. |
| * the mask length equals the leading dimension size of the result |
| * numbering the input vector indices left to right across the operands, all |
| mask values must be within range, viz. given two k-D operands v1 and v2 |
| above, all mask values are in the range [0,s_1+t_1). The value `-1` |
| represents a poison mask value, which specifies that the selected element |
| is poison. |
| |
| Note, scalable vectors are not supported. |
| |
| Example: |
| |
| ```mlir |
| %0 = vector.shuffle %a, %b[0, 3] |
| : vector<2xf32>, vector<2xf32> ; yields vector<2xf32> |
| %1 = vector.shuffle %c, %b[0, 1, 2] |
| : vector<2x16xf32>, vector<1x16xf32> ; yields vector<3x16xf32> |
| %2 = vector.shuffle %a, %b[3, 2, 1, 0] |
| : vector<2xf32>, vector<2xf32> ; yields vector<4xf32> |
| %3 = vector.shuffle %a, %b[0, 1] |
| : vector<f32>, vector<f32> ; yields vector<2xf32> |
| %4 = vector.shuffle %a, %b[0, 4, -1, -1, -1, -1] |
| : vector<4xf32>, vector<4xf32> ; yields vector<6xf32> |
| ``` |
| }]; |
| |
| let extraClassDeclaration = extraPoisonClassDeclaration # [{ |
| VectorType getV1VectorType() { |
| return ::llvm::cast<VectorType>(getV1().getType()); |
| } |
| VectorType getV2VectorType() { |
| return ::llvm::cast<VectorType>(getV2().getType()); |
| } |
| VectorType getResultVectorType() { |
| return ::llvm::cast<VectorType>(getVector().getType()); |
| } |
| }]; |
| |
| let assemblyFormat = "operands $mask attr-dict `:` type(operands)"; |
| |
| let hasFolder = 1; |
| let hasVerifier = 1; |
| let hasCanonicalizer = 1; |
| } |
| |
| def ResultIsDoubleSourceVectorType : TypesMatchWith< |
| "type of 'result' is double the width of the inputs", |
| "lhs", "result", |
| [{ |
| [&]() -> ::mlir::VectorType { |
| auto vectorType = ::llvm::cast<::mlir::VectorType>($_self); |
| ::mlir::VectorType::Builder builder(vectorType); |
| if (vectorType.getRank() == 0) { |
| static constexpr int64_t v2xTyShape[] = {2}; |
| return builder.setShape(v2xTyShape); |
| } |
| auto lastDim = vectorType.getRank() - 1; |
| return builder.setDim(lastDim, vectorType.getDimSize(lastDim) * 2); |
| }() |
| }]>; |
| |
| def Vector_InterleaveOp : |
| Vector_Op<"interleave", [Pure, AllTypesMatch<["lhs", "rhs"]>, |
| ResultIsDoubleSourceVectorType]> { |
| let summary = "constructs a vector by interleaving two input vectors"; |
| let description = [{ |
| The interleave operation constructs a new vector by interleaving the |
| elements from the trailing (or final) dimension of two input vectors, |
| returning a new vector where the trailing dimension is twice the size. |
| |
| Note that for the n-D case this differs from the interleaving possible with |
| `vector.shuffle`, which would only operate on the leading dimension. |
| |
| Another key difference is this operation supports scalable vectors, though |
| currently a general LLVM lowering is limited to the case where only the |
| trailing dimension is scalable. |
| |
| Example: |
| ```mlir |
| %a = arith.constant dense<[0, 1]> : vector<2xi32> |
| %b = arith.constant dense<[2, 3]> : vector<2xi32> |
| // The value of `%0` is `[0, 2, 1, 3]`. |
| %0 = vector.interleave %a, %b : vector<2xi32> -> vector<4xi32> |
| |
| // Examples showing allowed input and result types. |
| %1 = vector.interleave %c, %d : vector<f16> -> vector<2xf16> |
| %2 = vector.interleave %e, %f : vector<6x3xf32> -> vector<6x6xf32> |
| %3 = vector.interleave %g, %h : vector<[4]xi32> -> vector<[8]xi32> |
| %4 = vector.interleave %i, %j : vector<2x4x[2]xf64> -> vector<2x4x[4]xf64> |
| ``` |
| }]; |
| |
| let arguments = (ins AnyVectorOfAnyRank:$lhs, AnyVectorOfAnyRank:$rhs); |
| let results = (outs AnyVectorOfNonZeroRank:$result); |
| |
| let assemblyFormat = [{ |
| $lhs `,` $rhs attr-dict `:` type($lhs) `->` type($result) |
| }]; |
| |
| let extraClassDeclaration = [{ |
| VectorType getSourceVectorType() { |
| return ::llvm::cast<VectorType>(getLhs().getType()); |
| } |
| VectorType getResultVectorType() { |
| return ::llvm::cast<VectorType>(getResult().getType()); |
| } |
| }]; |
| } |
| |
| class ResultIsHalfSourceVectorType<string result> : TypesMatchWith< |
| "the trailing dimension of the results is half the width of source trailing dimension", |
| "source", result, |
| [{ |
| [&]() -> ::mlir::VectorType { |
| auto vectorType = ::llvm::cast<mlir::VectorType>($_self); |
| ::mlir::VectorType::Builder builder(vectorType); |
| auto lastDim = vectorType.getRank() - 1; |
| auto newDimSize = vectorType.getDimSize(lastDim) / 2;; |
| if (newDimSize <= 0) |
| return vectorType; // (invalid input type) |
| return builder.setDim(lastDim, newDimSize); |
| }() |
| }] |
| >; |
| |
| def SourceVectorEvenElementCount : PredOpTrait< |
| "the trailing dimension of the source vector has an even number of elements", |
| CPred<[{ |
| [&](){ |
| auto srcVec = getSourceVectorType(); |
| return srcVec.getDimSize(srcVec.getRank() - 1) % 2 == 0; |
| }() |
| }]> |
| >; |
| |
| def Vector_DeinterleaveOp : |
| Vector_Op<"deinterleave", [Pure, |
| SourceVectorEvenElementCount, |
| ResultIsHalfSourceVectorType<"res1">, |
| AllTypesMatch<["res1", "res2"]> |
| ]> { |
| let summary = "constructs two vectors by deinterleaving an input vector"; |
| let description = [{ |
| The deinterleave operation constructs two vectors from a single input |
| vector. The first result vector contains the elements from even indexes |
| of the input, and the second contains elements from odd indexes. This is |
| the inverse of a `vector.interleave` operation. |
| |
| Each output's trailing dimension is half of the size of the input |
| vector's trailing dimension. This operation requires the input vector |
| to have a rank > 0 and an even number of elements in its trailing |
| dimension. |
| |
| The operation supports scalable vectors. |
| |
| Example: |
| ```mlir |
| %0, %1 = vector.deinterleave %a |
| : vector<8xi8> -> vector<4xi8> |
| %2, %3 = vector.deinterleave %b |
| : vector<2x8xi8> -> vector<2x4xi8> |
| %4, %5 = vector.deinterleave %c |
| : vector<2x8x4xi8> -> vector<2x8x2xi8> |
| %6, %7 = vector.deinterleave %d |
| : vector<[8]xf32> -> vector<[4]xf32> |
| %8, %9 = vector.deinterleave %e |
| : vector<2x[6]xf64> -> vector<2x[3]xf64> |
| %10, %11 = vector.deinterleave %f |
| : vector<2x4x[6]xf64> -> vector<2x4x[3]xf64> |
| ``` |
| }]; |
| |
| let arguments = (ins AnyVectorOfNonZeroRank:$source); |
| let results = (outs AnyVectorOfNonZeroRank:$res1, AnyVectorOfNonZeroRank:$res2); |
| |
| let assemblyFormat = [{ |
| $source attr-dict `:` type($source) `->` type($res1) |
| }]; |
| |
| let extraClassDeclaration = [{ |
| VectorType getSourceVectorType() { |
| return ::llvm::cast<VectorType>(getSource().getType()); |
| } |
| VectorType getResultVectorType() { |
| return ::llvm::cast<VectorType>(getRes1().getType()); |
| } |
| }]; |
| } |
| |
| def Vector_ExtractOp : |
| Vector_Op<"extract", [Pure, |
| DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>, |
| PredOpTrait<"operand and result have same element type", |
| TCresVTEtIsSameAsOpBase<0, 0>>, |
| InferTypeOpAdaptorWithIsCompatible]> { |
| let summary = "extract operation"; |
| let description = [{ |
| Extracts an (n − k)-D result sub-vector from an n-D source vector at a |
| specified k-D position. When n = k, the result degenerates to a scalar |
| element. |
| |
| Static and dynamic indices must be greater or equal to zero and less than |
| the size of the corresponding dimension. The result is undefined if any |
| index is out-of-bounds. The value `-1` represents a poison index, which |
| specifies that the extracted element is poison. |
| |
| Example: |
| |
| ```mlir |
| %1 = vector.extract %0[3]: vector<8x16xf32> from vector<4x8x16xf32> |
| %2 = vector.extract %0[2, 1, 3]: f32 from vector<4x8x16xf32> |
| %4 = vector.extract %0[%a, %b, %c]: f32 from vector<4x8x16xf32> |
| %5 = vector.extract %0[2, %b]: vector<16xf32> from vector<4x8x16xf32> |
| %6 = vector.extract %10[-1, %c]: f32 from vector<4x16xf32> |
| ``` |
| }]; |
| |
| let arguments = (ins |
| AnyVectorOfAnyRank:$source, |
| Variadic<Index>:$dynamic_position, |
| DenseI64ArrayAttr:$static_position |
| ); |
| let results = (outs AnyType:$result); |
| |
| let builders = [ |
| // Builder to extract a scalar from a rank-0 vector. |
| OpBuilder<(ins "Value":$source)>, |
| OpBuilder<(ins "Value":$source, "int64_t":$position)>, |
| OpBuilder<(ins "Value":$source, "OpFoldResult":$position)>, |
| OpBuilder<(ins "Value":$source, "ArrayRef<int64_t>":$position)>, |
| OpBuilder<(ins "Value":$source, "ArrayRef<OpFoldResult>":$position)>, |
| ]; |
| |
| let extraClassDeclaration = extraPoisonClassDeclaration # [{ |
| VectorType getSourceVectorType() { |
| return ::llvm::cast<VectorType>(getSource().getType()); |
| } |
| |
| /// Return a vector with all the static and dynamic position indices. |
| SmallVector<OpFoldResult> getMixedPosition() { |
| OpBuilder builder(getContext()); |
| return getMixedValues(getStaticPosition(), getDynamicPosition(), builder); |
| } |
| |
| unsigned getNumIndices() { |
| return getStaticPosition().size(); |
| } |
| |
| /// Return "true" if the op has at least one dynamic position. |
| bool hasDynamicPosition() { |
| return !getDynamicPosition().empty(); |
| } |
| |
| /// Wrapper for getSource, which replaced getVector. |
| [[deprecated("Use getSource instead!")]] ::mlir::Value getVector() { |
| return getSource(); |
| } |
| }]; |
| |
| let assemblyFormat = [{ |
| $source `` |
| custom<DynamicIndexList>($dynamic_position, $static_position) |
| attr-dict `:` type($result) `from` type($source) |
| }]; |
| |
| let hasCanonicalizer = 1; |
| let hasFolder = 1; |
| let hasVerifier = 1; |
| } |
| |
| def Vector_FMAOp : |
| Op<Vector_Dialect, "fma", [ |
| Pure, AllTypesMatch<["lhs", "rhs", "acc", "result"]>, |
| DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]> |
| ] # ElementwiseMappable.traits>, |
| Arguments<(ins VectorOfAnyRankOf<[AnyFloat]>:$lhs, |
| VectorOfAnyRankOf<[AnyFloat]>:$rhs, |
| VectorOfAnyRankOf<[AnyFloat]>:$acc)>, |
| Results<(outs VectorOfAnyRankOf<[AnyFloat]>:$result)> { |
| let summary = "vector fused multiply-add"; |
| let description = [{ |
| Multiply-add expressions operate on n-D vectors and compute a fused |
| pointwise multiply-and-accumulate: `$result = $lhs * $rhs + $acc`. |
| All operands and result have the same vector type. The semantics |
| of the operation correspond to those of the `llvm.fma` |
| [intrinsic](https://llvm.org/docs/LangRef.html#int-fma). In the |
| particular case of lowering to LLVM, this is guaranteed to lower |
| to the `llvm.fma.*` intrinsic. |
| |
| Example: |
| |
| ```mlir |
| %3 = vector.fma %0, %1, %2: vector<8x16xf32> |
| ``` |
| }]; |
| let assemblyFormat = "$lhs `,` $rhs `,` $acc attr-dict `:` type($lhs)"; |
| let extraClassDeclaration = [{ |
| VectorType getVectorType() { return ::llvm::cast<VectorType>(getLhs().getType()); } |
| }]; |
| } |
| |
| def Vector_ToElementsOp : Vector_Op<"to_elements", [ |
| InferTypeOpAdaptor, Pure, |
| ShapedTypeMatchesElementCountAndTypes<"source", "elements">]> { |
| let summary = "operation that decomposes a vector into all its scalar elements"; |
| let description = [{ |
| This operation decomposes all the scalar elements from a vector. The |
| decomposed scalar elements are returned in row-major order. The number of |
| scalar results must match the number of elements in the input vector type. |
| All the result elements have the same result type, which must match the |
| element type of the input vector. Scalable vectors are not supported. |
| |
| Examples: |
| |
| ```mlir |
| // Decompose a 0-D vector. |
| %0 = vector.to_elements %v0 : vector<f32> |
| // %0 = %v0[0] |
| |
| // Decompose a 1-D vector. |
| %0:2 = vector.to_elements %v1 : vector<2xf32> |
| // %0#0 = %v1[0] |
| // %0#1 = %v1[1] |
| |
| // Decompose a 2-D. |
| %0:6 = vector.to_elements %v2 : vector<2x3xf32> |
| // %0#0 = %v2[0, 0] |
| // %0#1 = %v2[0, 1] |
| // %0#2 = %v2[0, 2] |
| // %0#3 = %v2[1, 0] |
| // %0#4 = %v2[1, 1] |
| // %0#5 = %v2[1, 2] |
| |
| // Decompose a 3-D vector. |
| %0:6 = vector.to_elements %v3 : vector<3x1x2xf32> |
| // %0#0 = %v3[0, 0, 0] |
| // %0#1 = %v3[0, 0, 1] |
| // %0#2 = %v3[1, 0, 0] |
| // %0#3 = %v3[1, 0, 1] |
| // %0#4 = %v3[2, 0, 0] |
| // %0#5 = %v3[2, 0, 1] |
| ``` |
| }]; |
| |
| let arguments = (ins AnyVectorOfAnyRank:$source); |
| let results = (outs Variadic<AnyType>:$elements); |
| let assemblyFormat = "$source attr-dict `:` type($source)"; |
| let hasFolder = 1; |
| let hasCanonicalizer = 1; |
| } |
| |
| def Vector_FromElementsOp : Vector_Op<"from_elements", [ |
| Pure, |
| ShapedTypeMatchesElementCountAndTypes<"dest", "elements">]> { |
| let summary = "operation that defines a vector from scalar elements"; |
| let description = [{ |
| This operation defines a vector from one or multiple scalar elements. The |
| scalar elements are arranged in row-major within the vector. The number of |
| elements must match the number of elements in the result type. All elements |
| must have the same type, which must match the element type of the result |
| vector type. Scalable vectors are not supported. |
| |
| Examples: |
| |
| ```mlir |
| // Define a 0-D vector. |
| %0 = vector.from_elements %f1 : vector<f32> |
| // [%f1] |
| |
| // Define a 1-D vector. |
| %1 = vector.from_elements %f1, %f2 : vector<2xf32> |
| // [%f1, %f2] |
| |
| // Define a 2-D vector. |
| %2 = vector.from_elements %f1, %f2, %f3, %f4, %f5, %f6 : vector<2x3xf32> |
| // [[%f1, %f2, %f3], [%f4, %f5, %f6]] |
| |
| // Define a 3-D vector. |
| %3 = vector.from_elements %f1, %f2, %f3, %f4, %f5, %f6 : vector<3x1x2xf32> |
| // [[[%f1, %f2]], [[%f3, %f4]], [[%f5, %f6]]] |
| ``` |
| }]; |
| |
| let arguments = (ins Variadic<AnyType>:$elements); |
| let results = (outs AnyFixedVectorOfAnyRank:$dest); |
| let assemblyFormat = "$elements attr-dict `:` type($dest)"; |
| let hasFolder = 1; |
| let hasCanonicalizer = 1; |
| } |
| |
| def Vector_InsertOp : |
| Vector_Op<"insert", [Pure, |
| DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>, |
| PredOpTrait<"source operand and result have same element type", |
| TCresVTEtIsSameAsOpBase<0, 0>>, |
| AllTypesMatch<["dest", "result"]>]> { |
| let summary = "insert operation"; |
| let description = [{ |
| Inserts an (n - k)-D sub-vector (value-to-store) into an n-D destination |
| vector at a specified k-D position. When n = 0, value-to-store degenerates |
| to a scalar element inserted into the n-D destination vector. |
| |
| Static and dynamic indices must be greater or equal to zero and less than |
| the size of the corresponding dimension. The result is undefined if any |
| index is out-of-bounds. The value `-1` represents a poison index, which |
| specifies that the resulting vector is poison. |
| |
| Example: |
| |
| ```mlir |
| %2 = vector.insert %0, %1[3] : vector<8x16xf32> into vector<4x8x16xf32> |
| %5 = vector.insert %3, %4[2, 1, 3] : f32 into vector<4x8x16xf32> |
| %11 = vector.insert %9, %10[%a, %b, %c] : f32 into vector<4x8x16xf32> |
| %12 = vector.insert %4, %10[2, %b] : vector<16xf32> into vector<4x8x16xf32> |
| %13 = vector.insert %20, %1[-1, %c] : f32 into vector<4x16xf32> |
| ``` |
| }]; |
| |
| let arguments = (ins |
| AnyType:$valueToStore, |
| AnyVectorOfAnyRank:$dest, |
| Variadic<Index>:$dynamic_position, |
| DenseI64ArrayAttr:$static_position |
| ); |
| let results = (outs AnyVectorOfAnyRank:$result); |
| |
| let builders = [ |
| // Builder to insert a scalar/rank-0 vector into a rank-0 vector. |
| OpBuilder<(ins "Value":$valueToStore, "Value":$dest)>, |
| OpBuilder<(ins "Value":$valueToStore, "Value":$dest, "int64_t":$position)>, |
| OpBuilder<(ins "Value":$valueToStore, "Value":$dest, "OpFoldResult":$position)>, |
| OpBuilder<(ins "Value":$valueToStore, "Value":$dest, "ArrayRef<int64_t>":$position)>, |
| OpBuilder<(ins "Value":$valueToStore, "Value":$dest, "ArrayRef<OpFoldResult>":$position)>, |
| ]; |
| |
| let extraClassDeclaration = extraPoisonClassDeclaration # [{ |
| Type getValueToStoreType() { return getValueToStore().getType(); } |
| VectorType getDestVectorType() { |
| return ::llvm::cast<VectorType>(getDest().getType()); |
| } |
| |
| /// Return a vector with all the static and dynamic position indices. |
| SmallVector<OpFoldResult> getMixedPosition() { |
| OpBuilder builder(getContext()); |
| return getMixedValues(getStaticPosition(), getDynamicPosition(), builder); |
| } |
| |
| unsigned getNumIndices() { |
| return getStaticPosition().size(); |
| } |
| |
| bool hasDynamicPosition() { |
| return llvm::any_of(getDynamicPosition(), |
| [](Value operand) { return operand != nullptr; }); |
| } |
| }]; |
| |
| let assemblyFormat = [{ |
| $valueToStore `,` $dest custom<DynamicIndexList>($dynamic_position, $static_position) |
| attr-dict `:` type($valueToStore) `into` type($dest) |
| }]; |
| |
| let hasCanonicalizer = 1; |
| let hasFolder = 1; |
| let hasVerifier = 1; |
| } |
| |
| def Vector_ScalableInsertOp : |
| Vector_Op<"scalable.insert", [Pure, |
| AllElementTypesMatch<["valueToStore", "dest"]>, |
| AllTypesMatch<["dest", "result"]>, |
| PredOpTrait<"position is a multiple of the source length.", |
| CPred< |
| "(getPos() % getSourceVectorType().getNumElements()) == 0" |
| >>]>, |
| Arguments<(ins VectorOfRank<[1]>:$valueToStore, |
| ScalableVectorOfRank<[1]>:$dest, |
| I64Attr:$pos)>, |
| Results<(outs ScalableVectorOfRank<[1]>:$result)> { |
| let summary = "insert subvector into scalable vector operation"; |
| // NOTE: This operation is designed to map to `llvm.vector.insert`, and its |
| // documentation should be kept aligned with LLVM IR: |
| // https://llvm.org/docs/LangRef.html#llvm-vector-insert-intrinsic |
| let description = [{ |
| This operations takes a rank-1 fixed-length or scalable subvector and |
| inserts it within the destination scalable vector starting from the |
| position specificed by `pos`. If the source vector is scalable, the |
| insertion position will be scaled by the runtime scaling factor of the |
| source subvector. |
| |
| The insertion position must be a multiple of the minimum size of the source |
| vector. For the operation to be well defined, the source vector must fit in |
| the destination vector from the specified position. Since the destination |
| vector is scalable and its runtime length is unknown, the validity of the |
| operation can't be verified nor guaranteed at compile time. |
| |
| Example: |
| |
| ```mlir |
| %2 = vector.scalable.insert %0, %1[8] : vector<4xf32> into vector<[16]xf32> |
| %5 = vector.scalable.insert %3, %4[0] : vector<8xf32> into vector<[4]xf32> |
| %8 = vector.scalable.insert %6, %7[0] : vector<[4]xf32> into vector<[8]xf32> |
| ``` |
| |
| Invalid example: |
| ```mlir |
| %2 = vector.scalable.insert %0, %1[5] : vector<4xf32> into vector<[16]xf32> |
| ``` |
| }]; |
| |
| let assemblyFormat = [{ |
| $valueToStore `,` $dest `[` $pos `]` attr-dict `:` type($valueToStore) `into` type($dest) |
| }]; |
| |
| let extraClassDeclaration = extraPoisonClassDeclaration # [{ |
| VectorType getSourceVectorType() { |
| return ::llvm::cast<VectorType>(getValueToStore().getType()); |
| } |
| VectorType getDestVectorType() { |
| return ::llvm::cast<VectorType>(getDest().getType()); |
| } |
| }]; |
| } |
| |
| def Vector_ScalableExtractOp : |
| Vector_Op<"scalable.extract", [Pure, |
| AllElementTypesMatch<["source", "result"]>, |
| PredOpTrait<"position is a multiple of the result length.", |
| CPred< |
| "(getPos() % getResultVectorType().getNumElements()) == 0" |
| >>]>, |
| Arguments<(ins ScalableVectorOfRank<[1]>:$source, |
| I64Attr:$pos)>, |
| Results<(outs VectorOfRank<[1]>:$result)> { |
| let summary = "extract subvector from scalable vector operation"; |
| // NOTE: This operation is designed to map to `llvm.vector.extract`, and its |
| // documentation should be kept aligned with LLVM IR: |
| // https://llvm.org/docs/LangRef.html#llvm-vector-extract-intrinsic |
| let description = [{ |
| Takes rank-1 source vector and a position `pos` within the source |
| vector, and extracts a subvector starting from that position. |
| |
| The extraction position must be a multiple of the minimum size of the result |
| vector. For the operation to be well defined, the destination vector must |
| fit within the source vector from the specified position. Since the source |
| vector is scalable and its runtime length is unknown, the validity of the |
| operation can't be verified nor guaranteed at compile time. |
| |
| Example: |
| |
| ```mlir |
| %1 = vector.scalable.extract %0[8] : vector<4xf32> from vector<[8]xf32> |
| %3 = vector.scalable.extract %2[0] : vector<[4]xf32> from vector<[8]xf32> |
| ``` |
| |
| Invalid example: |
| ```mlir |
| %1 = vector.scalable.extract %0[5] : vector<4xf32> from vector<[16]xf32> |
| ``` |
| }]; |
| |
| let assemblyFormat = [{ |
| $source `[` $pos `]` attr-dict `:` type($result) `from` type($source) |
| }]; |
| |
| let extraClassDeclaration = extraPoisonClassDeclaration # [{ |
| VectorType getSourceVectorType() { |
| return ::llvm::cast<VectorType>(getSource().getType()); |
| } |
| VectorType getResultVectorType() { |
| return ::llvm::cast<VectorType>(getResult().getType()); |
| } |
| /// Wrapper for getSource, which replaced getVector. |
| [[deprecated("Use getSource instead!")]] ::mlir::Value getVector() { |
| return getSource(); |
| } |
| }]; |
| } |
| |
| def Vector_InsertStridedSliceOp : |
| Vector_Op<"insert_strided_slice", [Pure, |
| PredOpTrait<"operand #0 and result have same element type", |
| TCresVTEtIsSameAsOpBase<0, 0>>, |
| AllTypesMatch<["dest", "result"]>]>, |
| Arguments<(ins AnyVectorOfNonZeroRank:$valueToStore, AnyVectorOfNonZeroRank:$dest, I64ArrayAttr:$offsets, |
| I64ArrayAttr:$strides)>, |
| Results<(outs AnyVectorOfNonZeroRank:$result)> { |
| let summary = "strided_slice operation"; |
| let description = [{ |
| Takes a k-D valueToStore vector, an n-D destination vector (n >= k), n-sized |
| `offsets` integer array attribute, a k-sized `strides` integer array attribute |
| and inserts the k-D valueToStore vector as a strided subvector at the proper offset |
| into the n-D destination vector. |
| |
| At the moment strides must contain only 1s. |
| |
| Returns an n-D vector that is a copy of the n-D destination vector in which |
| the last k-D dimensions contain the k-D valueToStore vector elements strided at |
| the proper location as specified by the offsets. |
| |
| Example: |
| |
| ```mlir |
| %2 = vector.insert_strided_slice %0, %1 |
| {offsets = [0, 0, 2], strides = [1, 1]}: |
| vector<2x4xf32> into vector<16x4x8xf32> |
| ``` |
| }]; |
| |
| let assemblyFormat = [{ |
| $valueToStore `,` $dest attr-dict `:` type($valueToStore) `into` type($dest) |
| }]; |
| |
| let builders = [ |
| OpBuilder<(ins "Value":$valueToStore, "Value":$dest, |
| "ArrayRef<int64_t>":$offsets, "ArrayRef<int64_t>":$strides)> |
| ]; |
| let extraClassDeclaration = [{ |
| // TODO: Rename |
| VectorType getSourceVectorType() { |
| return ::llvm::cast<VectorType>(getValueToStore().getType()); |
| } |
| VectorType getDestVectorType() { |
| return ::llvm::cast<VectorType>(getDest().getType()); |
| } |
| bool hasNonUnitStrides() { |
| return llvm::any_of(getStrides(), [](Attribute attr) { |
| return ::llvm::cast<IntegerAttr>(attr).getInt() != 1; |
| }); |
| } |
| }]; |
| |
| let hasFolder = 1; |
| let hasVerifier = 1; |
| let hasCanonicalizer = 1; |
| } |
| |
| def Vector_OuterProductOp : |
| Vector_Op<"outerproduct", [Pure, |
| PredOpTrait<"lhs operand and result have same element type", |
| TCresVTEtIsSameAsOpBase<0, 0>>, |
| PredOpTrait<"rhs operand and result have same element type", |
| TCresVTEtIsSameAsOpBase<0, 1>>, |
| DeclareOpInterfaceMethods<MaskableOpInterface>]>, |
| Arguments<(ins AnyVectorOfNonZeroRank:$lhs, AnyType:$rhs, |
| Optional<AnyVectorOfNonZeroRank>:$acc, |
| DefaultValuedAttr<Vector_CombiningKindAttr, "CombiningKind::ADD">:$kind)>, |
| Results<(outs AnyVectorOfNonZeroRank)> { |
| let summary = "vector outerproduct with optional fused add"; |
| let description = [{ |
| Takes 2 1-D vectors and returns the 2-D vector containing the outer-product, |
| as illustrated below: |
| ``` |
| outer | [c, d] |
| ------+------------ |
| [a, | [ [a*c, a*d], |
| b] | [b*c, b*d] ] |
| ``` |
| This operation also accepts a 1-D vector lhs and a scalar rhs. In this |
| case a simple AXPY operation is performed, which returns a 1-D vector. |
| ``` |
| [a, b] * c = [a*c, b*c] |
| ``` |
| |
| An optional extra vector argument with the same shape as the output |
| vector may be specified in which case the operation returns the sum of |
| the outer-product and the extra vector. In this multiply-accumulate |
| scenario for floating-point arguments, the rounding mode is enforced |
| by guaranteeing that a fused-multiply add operation is emitted. When |
| lowered to the LLVMIR dialect, this form emits `llvm.intr.fma`, which |
| is guaranteed to lower to actual `fma` instructions on x86. |
| |
| An optional kind attribute may be specified to be: `add`/`mul`/`minsi` |
| /`minui`/`maxsi`/`maxui`/`and`/`or`/`xor` for integers, and `add`/`mul` |
| /`minnumf`/`maxnumf`/`minimumf`/`maximumf` for floats. The default is |
| `add`. |
| |
| Example: |
| |
| ``` |
| %2 = vector.outerproduct %0, %1: vector<4xf32>, vector<8xf32> |
| return %2: vector<4x8xf32> |
| |
| %3 = vector.outerproduct %0, %1, %2: |
| vector<4xf32>, vector<8xf32>, vector<4x8xf32> |
| return %3: vector<4x8xf32> |
| |
| %4 = vector.outerproduct %0, %1, %2 {kind = #vector.kind<maxnumf>}: |
| vector<4xf32>, vector<8xf32>, vector<4x8xf32> |
| return %3: vector<4x8xf32> |
| |
| %6 = vector.outerproduct %4, %5: vector<10xf32>, f32 |
| return %6: vector<10xf32> |
| |
| ``` |
| }]; |
| let builders = [ |
| // Build an op without mask, use the type of `acc` as the return type. |
| OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc)> |
| ]; |
| let extraClassDeclaration = [{ |
| VectorType getOperandVectorTypeLHS() { |
| return ::llvm::cast<VectorType>(getLhs().getType()); |
| } |
| Type getOperandTypeRHS() { |
| return getRhs().getType(); |
| } |
| VectorType getOperandVectorTypeACC() { |
| return getAcc() |
| ? ::llvm::cast<VectorType>(getAcc().getType()) |
| : VectorType(); |
| } |
| VectorType getResultVectorType() { |
| return ::llvm::cast<VectorType>(getResult().getType()); |
| } |
| static CombiningKind getDefaultKind() { |
| return CombiningKind::ADD; |
| } |
| }]; |
| let hasCustomAssemblyFormat = 1; |
| let hasVerifier = 1; |
| } |
| |
| def Vector_ExtractStridedSliceOp : |
| Vector_Op<"extract_strided_slice", [Pure, |
| PredOpTrait<"operand and result have same element type", |
| TCresVTEtIsSameAsOpBase<0, 0>>]>, |
| Arguments<(ins AnyVectorOfNonZeroRank:$source, I64ArrayAttr:$offsets, |
| I64ArrayAttr:$sizes, I64ArrayAttr:$strides)>, |
| Results<(outs AnyVectorOfNonZeroRank)> { |
| let summary = "extract_strided_slice operation"; |
| let description = [{ |
| Takes an n-D vector, k-D `offsets` integer array attribute, a k-sized |
| `sizes` integer array attribute, a k-sized `strides` integer array |
| attribute and extracts the n-D subvector at the proper offset. |
| |
| At the moment strides must contain only 1s. |
| |
| Returns an n-D vector where the first k-D dimensions match the `sizes` |
| attribute. The returned subvector contains the elements starting at offset |
| `offsets` and ending at `offsets + sizes`. |
| |
| Example: |
| |
| ```mlir |
| %1 = vector.extract_strided_slice %0 |
| {offsets = [0, 2], sizes = [2, 4], strides = [1, 1]}: |
| vector<4x8x16xf32> to vector<2x4x16xf32> |
| |
| // TODO: Evolve to a range form syntax similar to: |
| %1 = vector.extract_strided_slice %0[0:2:1][2:4:1] |
| vector<4x8x16xf32> to vector<2x4x16xf32> |
| ``` |
| |
| TODO: Implement support for poison indices. |
| }]; |
| let builders = [ |
| OpBuilder<(ins "Value":$source, "ArrayRef<int64_t>":$offsets, |
| "ArrayRef<int64_t>":$sizes, "ArrayRef<int64_t>":$strides)> |
| ]; |
| let extraClassDeclaration = [{ |
| VectorType getSourceVectorType() { |
| return ::llvm::cast<VectorType>(getSource().getType()); |
| } |
| void getOffsets(SmallVectorImpl<int64_t> &results); |
| bool hasNonUnitStrides() { |
| return llvm::any_of(getStrides(), [](Attribute attr) { |
| return ::llvm::cast<IntegerAttr>(attr).getInt() != 1; |
| }); |
| } |
| /// Wrapper for getSource, which replaced getVector. |
| [[deprecated("Use getSource instead!")]] ::mlir::Value getVector() { |
| return getSource(); |
| } |
| }]; |
| let hasCanonicalizer = 1; |
| let hasFolder = 1; |
| let hasVerifier = 1; |
| let assemblyFormat = "$source attr-dict `:` type($source) `to` type(results)"; |
| } |
| |
| // TODO: Tighten semantics so that masks and inbounds can't be used |
| // simultaneously within the same transfer op. |
| def Vector_TransferReadOp : |
| Vector_Op<"transfer_read", [ |
| DeclareOpInterfaceMethods<VectorTransferOpInterface>, |
| DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>, |
| DeclareOpInterfaceMethods<MaskableOpInterface>, |
| DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, |
| DeclareOpInterfaceMethods<ConditionallySpeculatable>, |
| DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>, |
| AttrSizedOperandSegments, |
| DestinationStyleOpInterface |
| ]>, |
| Arguments<(ins AnyShaped:$base, |
| Variadic<Index>:$indices, |
| AffineMapAttr:$permutation_map, |
| AnyType:$padding, |
| Optional<VectorOfNonZeroRankOf<[I1]>>:$mask, |
| BoolArrayAttr:$in_bounds)>, |
| Results<(outs AnyVectorOfAnyRank:$vector)> { |
| |
| let summary = "Reads a supervector from memory into an SSA vector value."; |
| |
| let description = [{ |
| The `vector.transfer_read` op performs a read from a slice within a |
| [MemRef](../LangRef.md#memref-type) or a Ranked |
| [Tensor](../LangRef.md#tensor-type) supplied as its first operand |
| into a [vector](../LangRef.md#vector-type) of the same base elemental type. |
| |
| A memref/tensor operand with vector element type, must have its vector |
| element type match a suffix (shape and element type) of the vector (e.g. |
| memref<3x2x6x4x3xf32>, vector<1x1x4x3xf32>). |
| |
| The slice is further defined by a full-rank index within the MemRef/Tensor, |
| supplied as the operands `[1 .. 1 + rank(memref/tensor))` that defines the |
| starting point of the transfer (e.g. `%A[%i0, %i1, %i2]`). |
| |
| The permutation_map [attribute](../LangRef.md#attributes) is an |
| [affine-map](Affine.md#affine-maps) which specifies the transposition on the |
| slice to match the vector shape. The permutation map may be implicit and |
| omitted from parsing and printing if it is the canonical minor identity map |
| (i.e. if it does not permute or broadcast any dimension). |
| |
| The size of the slice is specified by the size of the vector, given as the |
| return type. |
| |
| An SSA value `padding` of the same elemental type as the MemRef/Tensor is |
| provided to specify a fallback value in the case of out-of-bounds accesses |
| and/or masking. |
| |
| An optional SSA value `mask` may be specified to mask out elements read from |
| the MemRef/Tensor. The `mask` type is an `i1` vector with a shape that |
| matches how elements are read from the MemRef/Tensor, *before* any |
| permutation or broadcasting. Elements whose corresponding mask element is |
| `0` are masked out and replaced with `padding`. |
| |
| For every vector dimension, the boolean array attribute `in_bounds` |
| specifies if the transfer is guaranteed to be within the source bounds. If |
| set to "false", accesses (including the starting point) may run |
| out-of-bounds along the respective vector dimension as the index increases. |
| Non-vector dimensions *must* always be in-bounds. The `in_bounds` array |
| length has to be equal to the vector rank. This attribute has a default |
| value: `false` (i.e. "out-of-bounds"). When skipped in the textual IR, the |
| default value is assumed. Similarly, the OP printer will omit this |
| attribute when all dimensions are out-of-bounds (i.e. the default value is |
| used). |
| |
| A `vector.transfer_read` can be lowered to a simple load if all dimensions |
| are specified to be within bounds and no `mask` was specified. |
| |
| This operation is called 'read' by opposition to 'load' because the |
| super-vector granularity is generally not representable with a single |
| hardware register. A `vector.transfer_read` is thus a mid-level abstraction |
| that supports super-vectorization with non-effecting padding for full-tile |
| only operations. |
| |
| More precisely, let's dive deeper into the permutation_map for the following |
| MLIR: |
| |
| ```mlir |
| vector.transfer_read %A[%expr1, %expr2, %expr3, %expr4] |
| { permutation_map : (d0,d1,d2,d3) -> (d2,0,d0) } : |
| memref<?x?x?x?xf32>, vector<3x4x5xf32> |
| ``` |
| |
| This operation always reads a slice starting at `%A[%expr1, %expr2, %expr3, |
| %expr4]`. The size of the slice can be inferred from the resulting vector |
| shape and walking back through the permutation map: 3 along d2 and 5 along |
| d0, so the slice is: `%A[%expr1 : %expr1 + 5, %expr2, %expr3:%expr3 + 3, %expr4]` |
| |
| That slice needs to be read into a `vector<3x4x5xf32>`. Since the |
| permutation map is not full rank, there must be a broadcast along vector |
| dimension `1`. |
| |
| A notional lowering of vector.transfer_read could generate code resembling: |
| |
| ```mlir |
| // %expr1, %expr2, %expr3, %expr4 defined before this point |
| // alloc a temporary buffer for performing the "gather" of the slice. |
| %tmp = memref.alloc() : memref<vector<3x4x5xf32>> |
| for %i = 0 to 3 { |
| affine.for %j = 0 to 4 { |
| affine.for %k = 0 to 5 { |
| // Note that this load does not involve %j. |
| %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32> |
| // Update the temporary gathered slice with the individual element |
| %slice = memref.load %tmp : memref<vector<3x4x5xf32>> -> vector<3x4x5xf32> |
| %updated = vector.insert %a, %slice[%i, %j, %k] : f32 into vector<3x4x5xf32> |
| memref.store %updated, %tmp : memref<vector<3x4x5xf32>> |
| }}} |
| // At this point we gathered the elements from the original |
| // memref into the desired vector layout, stored in the `%tmp` allocation. |
| %vec = memref.load %tmp : memref<vector<3x4x5xf32>> -> vector<3x4x5xf32> |
| ``` |
| |
| On a GPU one could then map `i`, `j`, `k` to blocks and threads. Notice that |
| the temporary storage footprint could conceptually be only `3 * 5` values but |
| `3 * 4 * 5` values are actually transferred between `%A` and `%tmp`. |
| |
| Alternatively, if a notional vector broadcast operation were available, we |
| could avoid the loop on `%j` and the lowered code would resemble: |
| |
| ```mlir |
| // %expr1, %expr2, %expr3, %expr4 defined before this point |
| %tmp = memref.alloc() : memref<vector<3x4x5xf32>> |
| for %i = 0 to 3 { |
| affine.for %k = 0 to 5 { |
| %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32> |
| %slice = memref.load %tmp : memref<vector<3x4x5xf32>> -> vector<3x4x5xf32> |
| // Here we only store to the first element in dimension one |
| %updated = vector.insert %a, %slice[%i, 0, %k] : f32 into vector<3x4x5xf32> |
| memref.store %updated, %tmp : memref<vector<3x4x5xf32>> |
| }} |
| // At this point we gathered the elements from the original |
| // memref into the desired vector layout, stored in the `%tmp` allocation. |
| // However we haven't replicated them alongside the first dimension, we need |
| // to broadcast now. |
| %partialVec = load %tmp : memref<vector<3x4x5xf32>> -> vector<3x4x5xf32> |
| %vec = broadcast %tmpvec, 1 : vector<3x4x5xf32> |
| ``` |
| |
| where `broadcast` broadcasts from element 0 to all others along the |
| specified dimension. This time, the number of loaded element is `3 * 5` |
| values. |
| An additional `1` broadcast is required. On a GPU this broadcast could be |
| implemented using a warp-shuffle if loop `j` were mapped to `threadIdx.x`. |
| |
| Syntax |
| ``` |
| operation ::= ssa-id `=` `vector.transfer_read` ssa-use-list |
| `{` attribute-entry `} :` memref-type `,` vector-type |
| ``` |
| |
| Example: |
| |
| ```mlir |
| // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32> |
| // and pad with %f0 to handle the boundary case: |
| %f0 = arith.constant 0.0f : f32 |
| affine.for %i0 = 0 to %0 { |
| affine.for %i1 = 0 to %1 step 256 { |
| affine.for %i2 = 0 to %2 step 32 { |
| %v = vector.transfer_read %A[%i0, %i1, %i2], (%f0) |
| {permutation_map: (d0, d1, d2) -> (d2, d1)} : |
| memref<?x?x?xf32>, vector<32x256xf32> |
| }}} |
| |
| // or equivalently (rewrite with vector.transpose) |
| %f0 = arith.constant 0.0f : f32 |
| affine.for %i0 = 0 to %0 { |
| affine.for %i1 = 0 to %1 step 256 { |
| affine.for %i2 = 0 to %2 step 32 { |
| %v0 = vector.transfer_read %A[%i0, %i1, %i2], (%f0) |
| {permutation_map: (d0, d1, d2) -> (d1, d2)} : |
| memref<?x?x?xf32>, vector<256x32xf32> |
| %v = vector.transpose %v0, [1, 0] : |
| vector<256x32xf32> to vector<32x256f32> |
| }}} |
| |
| // Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into |
| // vector<128xf32>. The underlying implementation will require a 1-D vector |
| // broadcast: |
| affine.for %i0 = 0 to %0 { |
| affine.for %i1 = 0 to %1 { |
| %3 = vector.transfer_read %A[%i0, %i1] |
| {permutation_map: (d0, d1) -> (0)} : |
| memref<?x?xf32>, vector<128xf32> |
| } |
| } |
| |
| // Read from a memref with vector element type. |
| %4 = vector.transfer_read %arg1[%c3, %c3], %vf0 |
| {permutation_map = (d0, d1)->(d0, d1)} |
| : memref<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32> |
| |
| // Read from a tensor with vector element type. |
| %4 = vector.transfer_read %arg1[%c3, %c3], %vf0 |
| {permutation_map = (d0, d1)->(d0, d1)} |
| : tensor<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32> |
| |
| // Special encoding for 0-d transfer with 0-d tensor/memref, vector shape |
| // {1} and permutation_map () -> (0). |
| %0 = vector.transfer_read %arg0[], %f0 {permutation_map = affine_map<()->(0)>} : |
| tensor<f32>, vector<1xf32> |
| ``` |
| }]; |
| |
| let builders = [ |
| /// 1. Builder that sets padding to `padding` or poison if not provided and |
| /// an empty mask (variant with attrs). |
| OpBuilder<(ins "VectorType":$vectorType, |
| "Value":$source, |
| "ValueRange":$indices, |
| "std::optional<Value>":$padding, |
| "AffineMapAttr":$permutationMapAttr, |
| "ArrayAttr":$inBoundsAttr)>, |
| /// 2. Builder that sets padding to `padding` or poison if not provided and |
| /// an empty mask (variant without attrs). |
| OpBuilder<(ins "VectorType":$vectorType, |
| "Value":$source, |
| "ValueRange":$indices, |
| "std::optional<Value>":$padding, |
| "AffineMap":$permutationMap, |
| CArg<"std::optional<ArrayRef<bool>>", "::std::nullopt">:$inBounds)>, |
| /// 3. Builder that sets padding to `padding` or poison if not provided and |
| /// permutation map to 'getMinorIdentityMap'. |
| OpBuilder<(ins "VectorType":$vectorType, |
| "Value":$source, |
| "ValueRange":$indices, |
| "std::optional<Value>":$padding, |
| CArg<"std::optional<ArrayRef<bool>>", "::std::nullopt">:$inBounds)> |
| ]; |
| |
| let extraClassDeclaration = [{ |
| // MaskableOpInterface methods. |
| bool supportsPassthru() { return true; } |
| |
| MutableOperandRange getDpsInitsMutable() { |
| return MutableOperandRange(getOperation(), /*start=*/0, /*length=*/0); |
| } |
| }]; |
| |
| let hasCanonicalizer = 1; |
| let hasCustomAssemblyFormat = 1; |
| let hasFolder = 1; |
| let hasVerifier = 1; |
| } |
| |
| // TODO: Tighten semantics so that masks and inbounds can't be used |
| // simultaneously within the same transfer op. |
| def Vector_TransferWriteOp : |
| Vector_Op<"transfer_write", [ |
| DeclareOpInterfaceMethods<VectorTransferOpInterface>, |
| DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>, |
| DeclareOpInterfaceMethods<MaskableOpInterface>, |
| DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, |
| DeclareOpInterfaceMethods<ConditionallySpeculatable>, |
| DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>, |
| AttrSizedOperandSegments, |
| DestinationStyleOpInterface |
| ]>, |
| Arguments<(ins AnyVectorOfAnyRank:$valueToStore, |
| AnyShaped:$base, |
| Variadic<Index>:$indices, |
| AffineMapAttr:$permutation_map, |
| Optional<VectorOfNonZeroRankOf<[I1]>>:$mask, |
| BoolArrayAttr:$in_bounds)>, |
| Results<(outs Optional<AnyRankedTensor>:$result)> { |
| |
| let summary = "The vector.transfer_write op writes a supervector to memory."; |
| |
| let description = [{ |
| The `vector.transfer_write` op performs a write from a |
| [vector](../LangRef.md#vector-type), supplied as its first operand, into a |
| slice within a [MemRef](../LangRef.md#memref-type) or a Ranked |
| [Tensor](../LangRef.md#tensor-type) of the same base elemental type, |
| supplied as its second operand. |
| |
| A vector memref/tensor operand must have its vector element type match a |
| suffix (shape and element type) of the vector (e.g. memref<3x2x6x4x3xf32>, |
| vector<1x1x4x3xf32>). If the operand is a tensor, the operation returns a |
| new tensor of the same type. |
| |
| The slice is further defined by a full-rank index within the MemRef/Tensor, |
| supplied as the operands `[2 .. 2 + rank(memref/tensor))` that defines the |
| starting point of the transfer (e.g. `%A[%i0, %i1, %i2, %i3]`). |
| |
| The permutation_map [attribute](../LangRef.md#attributes) is an |
| [affine-map](Affine.md#affine-maps) which specifies the transposition on the |
| slice to match the vector shape. The permutation map may be implicit and |
| omitted from parsing and printing if it is the canonical minor identity map |
| (i.e. if it does not permute any dimension). In contrast to `transfer_read`, |
| write ops cannot have broadcast dimensions. |
| |
| The size of the slice is specified by the size of the vector. |
| |
| An optional SSA value `mask` may be specified to mask out elements written |
| to the MemRef/Tensor. The `mask` type is an `i1` vector with a shape that |
| matches how elements are written into the MemRef/Tensor, *after* applying |
| any permutation. Elements whose corresponding mask element is `0` are |
| masked out. |
| |
| For every vector dimension, the boolean array attribute `in_bounds` |
| specifies if the transfer is guaranteed to be within the source bounds. If |
| set to "false", accesses (including the starting point) may run |
| out-of-bounds along the respective vector dimension as the index increases. |
| Non-vector dimensions *must* always be in-bounds. The `in_bounds` array |
| length has to be equal to the vector rank. This attribute has a default |
| value: `false` (i.e. "out-of-bounds"). When skipped in the textual IR, the |
| default value is assumed. Similarly, the OP printer will omit this |
| attribute when all dimensions are out-of-bounds (i.e. the default value is |
| used). |
| |
| A `vector.transfer_write` can be lowered to a simple store if all |
| dimensions are specified to be within bounds and no `mask` was specified. |
| |
| This operation is called 'write' by opposition to 'store' because the |
| super-vector granularity is generally not representable with a single |
| hardware register. A `vector.transfer_write` is thus a |
| mid-level abstraction that supports super-vectorization with non-effecting |
| padding for full-tile-only code. It is the responsibility of |
| `vector.transfer_write`'s implementation to ensure the memory writes are |
| valid. Different lowerings may be pertinent depending on the hardware |
| support. |
| |
| Example: |
| |
| ```mlir |
| // write vector<16x32x64xf32> into the slice |
| // `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`: |
| for %i0 = 0 to %0 { |
| affine.for %i1 = 0 to %1 step 32 { |
| affine.for %i2 = 0 to %2 step 64 { |
| affine.for %i3 = 0 to %3 step 16 { |
| %val = `ssa-value` : vector<16x32x64xf32> |
| vector.transfer_write %val, %A[%i0, %i1, %i2, %i3] |
| {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} : |
| vector<16x32x64xf32>, memref<?x?x?x?xf32> |
| }}}} |
| |
| // or equivalently (rewrite with vector.transpose) |
| for %i0 = 0 to %0 { |
| affine.for %i1 = 0 to %1 step 32 { |
| affine.for %i2 = 0 to %2 step 64 { |
| affine.for %i3 = 0 to %3 step 16 { |
| %val = `ssa-value` : vector<16x32x64xf32> |
| %valt = vector.transpose %val, [1, 2, 0] : |
| vector<16x32x64xf32> -> vector<32x64x16xf32> |
| vector.transfer_write %valt, %A[%i0, %i1, %i2, %i3] |
| {permutation_map: (d0, d1, d2, d3) -> (d1, d2, d3)} : |
| vector<32x64x16xf32>, memref<?x?x?x?xf32> |
| }}}} |
| |
| // write to a memref with vector element type. |
| vector.transfer_write %4, %arg1[%c3, %c3] |
| {permutation_map = (d0, d1)->(d0, d1)} |
| : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>> |
| |
| // return a tensor where the vector is inserted into the source tensor. |
| %5 = vector.transfer_write %4, %arg1[%c3, %c3] |
| {permutation_map = (d0, d1)->(d0, d1)} |
| : vector<1x1x4x3xf32>, tensor<?x?xvector<4x3xf32>> |
| |
| // Special encoding for 0-d transfer with 0-d tensor/memref, vector shape |
| // {1} and permutation_map () -> (0). |
| %1 = vector.transfer_write %0, %arg0[] {permutation_map = affine_map<()->(0)>} : |
| vector<1xf32>, tensor<f32> |
| ``` |
| }]; |
| |
| let builders = [ |
| /// 1. Builder with type inference. |
| OpBuilder<(ins "Value":$vector, |
| "Value":$dest, |
| "ValueRange":$indices, |
| "AffineMapAttr":$permutationMapAttr, |
| "Value":$mask, |
| "ArrayAttr":$inBoundsAttr)>, |
| /// 2. Builder with type inference that sets an empty mask (variant with attrs). |
| OpBuilder<(ins "Value":$vector, |
| "Value":$dest, |
| "ValueRange":$indices, |
| "AffineMapAttr":$permutationMapAttr, |
| "ArrayAttr":$inBoundsAttr)>, |
| /// 3. Builder with type inference that sets an empty mask (variant without attrs). |
| OpBuilder<(ins "Value":$vector, |
| "Value":$dest, |
| "ValueRange":$indices, |
| "AffineMap":$permutationMap, |
| CArg<"std::optional<ArrayRef<bool>>", "::std::nullopt">:$inBounds)>, |
| /// 4. Builder with type inference that sets an empty mask and sets permutation |
| /// map to 'getMinorIdentityMap'. |
| OpBuilder<(ins "Value":$vector, |
| "Value":$dest, |
| "ValueRange":$indices, |
| CArg<"std::optional<ArrayRef<bool>>", "::std::nullopt">:$inBounds)>, |
| ]; |
| |
| let extraClassDeclaration = [{ |
| /// This method is added to maintain uniformity with load/store |
| /// ops of other dialects. |
| Value getValue() { return getVector(); } |
| |
| MutableOperandRange getDpsInitsMutable() { return getBaseMutable(); } |
| }]; |
| |
| let hasFolder = 1; |
| let hasCanonicalizer = 1; |
| let hasCustomAssemblyFormat = 1; |
| let hasVerifier = 1; |
| } |
| |
| def Vector_LoadOp : Vector_Op<"load", [ |
| DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>, |
| DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>, |
| DeclareOpInterfaceMethods<AlignmentAttrOpInterface> |
| ]> { |
| let summary = "reads an n-D slice of memory into an n-D vector"; |
| let description = [{ |
| The 'vector.load' operation reads an n-D slice of memory into an n-D |
| vector. It takes a 'base' memref, an index for each memref dimension and a |
| result vector type as arguments. It returns a value of the result vector |
| type. The 'base' memref and indices determine the start memory address from |
| which to read. Each index provides an offset for each memref dimension |
| based on the element type of the memref. The shape of the result vector |
| type determines the shape of the slice read from the start memory address. |
| The elements along each dimension of the slice are strided by the memref |
| strides. When loading more than 1 element, only unit strides are allowed |
| along the most minor memref dimension. These constraints guarantee that |
| elements read along the first dimension of the slice are contiguous in |
| memory. |
| |
| The memref element type can be a scalar or a vector type. If the memref |
| element type is a scalar, it should match the element type of the result |
| vector. If the memref element type is vector, it should match the result |
| vector type. |
| |
| Example: 0-D vector load on a scalar memref. |
| ```mlir |
| %result = vector.load %base[%i, %j] : memref<100x100xf32>, vector<f32> |
| ``` |
| |
| Example: 1-D vector load on a scalar memref. |
| ```mlir |
| %result = vector.load %base[%i, %j] : memref<100x100xf32>, vector<8xf32> |
| ``` |
| |
| Example: 1-D vector load on a vector memref. |
| ```mlir |
| %result = vector.load %memref[%i, %j] : memref<200x100xvector<8xf32>>, vector<8xf32> |
| ``` |
| |
| Example: 2-D vector load on a scalar memref. |
| ```mlir |
| %result = vector.load %memref[%i, %j] : memref<200x100xf32>, vector<4x8xf32> |
| ``` |
| |
| Example: 2-D vector load on a vector memref. |
| ```mlir |
| %result = vector.load %memref[%i, %j] : memref<200x100xvector<4x8xf32>>, vector<4x8xf32> |
| ``` |
| |
| Representation-wise, the 'vector.load' operation permits out-of-bounds |
| reads. Support and implementation of out-of-bounds vector loads is |
| target-specific. No assumptions should be made on the value of elements |
| loaded out of bounds. Not all targets may support out-of-bounds vector |
| loads. |
| |
| Example: Potential out-of-bound vector load. |
| ```mlir |
| %result = vector.load %memref[%index] : memref<?xf32>, vector<8xf32> |
| ``` |
| |
| Example: Explicit out-of-bound vector load. |
| ```mlir |
| %result = vector.load %memref[%c0] : memref<7xf32>, vector<8xf32> |
| ``` |
| |
| An optional `alignment` attribute allows to specify the byte alignment of the |
| load operation. It must be a positive power of 2. The operation must access |
| memory at an address aligned to this boundary. Violating this requirement |
| triggers immediate undefined behavior. |
| }]; |
| |
| let arguments = (ins Arg<AnyMemRef, "the reference to load from", |
| [MemRead]>:$base, |
| Variadic<Index>:$indices, |
| DefaultValuedOptionalAttr<BoolAttr, "false">:$nontemporal, |
| OptionalAttr<IntValidAlignment<I64Attr>>: $alignment); |
| |
| let builders = [ |
| OpBuilder<(ins "VectorType":$resultType, |
| "Value":$base, |
| "ValueRange":$indices, |
| CArg<"bool", "false">:$nontemporal, |
| CArg<"llvm::MaybeAlign", "llvm::MaybeAlign()">:$alignment), [{ |
| return build($_builder, $_state, resultType, base, indices, nontemporal, |
| alignment.has_value() ? $_builder.getI64IntegerAttr(alignment->value()) : |
| nullptr); |
| }]>, |
| OpBuilder<(ins "TypeRange":$resultTypes, |
| "Value":$base, |
| "ValueRange":$indices, |
| CArg<"bool", "false">:$nontemporal, |
| CArg<"llvm::MaybeAlign", "llvm::MaybeAlign()">:$alignment), [{ |
| return build($_builder, $_state, resultTypes, base, indices, nontemporal, |
| alignment.has_value() ? $_builder.getI64IntegerAttr(alignment->value()) : |
| nullptr); |
| }]> |
| ]; |
| |
| let results = (outs AnyVectorOfAnyRank:$result); |
| |
| let extraClassDeclaration = [{ |
| MemRefType getMemRefType() { |
| return ::llvm::cast<MemRefType>(getBase().getType()); |
| } |
| |
| VectorType getVectorType() { |
| return ::llvm::cast<VectorType>(getResult().getType()); |
| } |
| }]; |
| |
| let hasFolder = 1; |
| let hasVerifier = 1; |
| |
| let assemblyFormat = |
| "$base `[` $indices `]` attr-dict `:` type($base) `,` type($result)"; |
| } |
| |
| def Vector_StoreOp : Vector_Op<"store", [ |
| DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>, |
| DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>, |
| DeclareOpInterfaceMethods<AlignmentAttrOpInterface> |
| ]> { |
| let summary = "writes an n-D vector to an n-D slice of memory"; |
| let description = [{ |
| The 'vector.store' operation writes an n-D vector to an n-D slice of memory. |
| It takes the vector value to be stored, a 'base' memref and an index for |
| each memref dimension. The 'base' memref and indices determine the start |
| memory address from which to write. Each index provides an offset for each |
| memref dimension based on the element type of the memref. The shape of the |
| vector value to store determines the shape of the slice written from the |
| start memory address. The elements along each dimension of the slice are |
| strided by the memref strides. When storing more than 1 element, only unit |
| strides are allowed along the most minor memref dimension. These constraints |
| guarantee that elements written along the first dimension of the slice are |
| contiguous in memory. |
| |
| The memref element type can be a scalar or a vector type. If the memref |
| element type is a scalar, it should match the element type of the value |
| to store. If the memref element type is vector, it should match the type |
| of the value to store. |
| |
| Example: 0-D vector store on a scalar memref. |
| ```mlir |
| vector.store %valueToStore, %memref[%i, %j] : memref<200x100xf32>, vector<f32> |
| ``` |
| |
| Example: 1-D vector store on a scalar memref. |
| ```mlir |
| vector.store %valueToStore, %memref[%i, %j] : memref<200x100xf32>, vector<8xf32> |
| ``` |
| |
| Example: 1-D vector store on a vector memref. |
| ```mlir |
| vector.store %valueToStore, %memref[%i, %j] : memref<200x100xvector<8xf32>>, vector<8xf32> |
| ``` |
| |
| Example: 2-D vector store on a scalar memref. |
| ```mlir |
| vector.store %valueToStore, %memref[%i, %j] : memref<200x100xf32>, vector<4x8xf32> |
| ``` |
| |
| Example: 2-D vector store on a vector memref. |
| ```mlir |
| vector.store %valueToStore, %memref[%i, %j] : memref<200x100xvector<4x8xf32>>, vector<4x8xf32> |
| ``` |
| |
| Representation-wise, the 'vector.store' operation permits out-of-bounds |
| writes. Support and implementation of out-of-bounds vector stores are |
| target-specific. No assumptions should be made on the memory written out of |
| bounds. Not all targets may support out-of-bounds vector stores. |
| |
| Example: Potential out-of-bounds vector store. |
| ```mlir |
| vector.store %valueToStore, %memref[%index] : memref<?xf32>, vector<8xf32> |
| ``` |
| |
| Example: Explicit out-of-bounds vector store. |
| ```mlir |
| vector.store %valueToStore, %memref[%c0] : memref<7xf32>, vector<8xf32> |
| ``` |
| |
| An optional `alignment` attribute allows to specify the byte alignment of the |
| store operation. It must be a positive power of 2. The operation must access |
| memory at an address aligned to this boundary. Violating this requirement |
| triggers immediate undefined behavior. |
| }]; |
| |
| let arguments = (ins |
| AnyVectorOfAnyRank:$valueToStore, |
| Arg<AnyMemRef, "the reference to store to", |
| [MemWrite]>:$base, |
| Variadic<Index>:$indices, |
| DefaultValuedOptionalAttr<BoolAttr, "false">:$nontemporal, |
| OptionalAttr<IntValidAlignment<I64Attr>>: $alignment); |
| |
| let builders = [ |
| OpBuilder<(ins "Value":$valueToStore, |
| "Value":$base, |
| "ValueRange":$indices, |
| CArg<"bool", "false">:$nontemporal, |
| CArg<"llvm::MaybeAlign", "llvm::MaybeAlign()">:$alignment), [{ |
| return build($_builder, $_state, valueToStore, base, indices, nontemporal, |
| alignment.has_value() ? $_builder.getI64IntegerAttr(alignment->value()) : |
| nullptr); |
| }]> |
| ]; |
| |
| let extraClassDeclaration = [{ |
| MemRefType getMemRefType() { |
| return ::llvm::cast<MemRefType>(getBase().getType()); |
| } |
| |
| VectorType getVectorType() { |
| return ::llvm::cast<VectorType>(getValueToStore().getType()); |
| } |
| }]; |
| |
| let hasFolder = 1; |
| let hasVerifier = 1; |
| |
| let assemblyFormat = "$valueToStore `,` $base `[` $indices `]` attr-dict " |
| "`:` type($base) `,` type($valueToStore)"; |
| } |
| |
| def Vector_MaskedLoadOp : |
| Vector_Op<"maskedload", [ |
| DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>, |
| DeclareOpInterfaceMethods<AlignmentAttrOpInterface> |
| ]>, |
| Arguments<(ins Arg<AnyMemRef, "", [MemRead]>:$base, |
| Variadic<Index>:$indices, |
| VectorOfNonZeroRankOf<[I1]>:$mask, |
| AnyVectorOfNonZeroRank:$pass_thru, |
| OptionalAttr<IntValidAlignment<I64Attr>>: $alignment)>, |
| Results<(outs AnyVectorOfNonZeroRank:$result)> { |
| |
| let summary = "loads elements from memory into a vector as defined by a mask vector"; |
| |
| let description = [{ |
| The masked load reads elements from memory into a vector as defined |
| by a base with indices and a mask vector. When the mask is set, the |
| element is read from memory. Otherwise, the corresponding element is taken |
| from a pass-through vector. Informally the semantics are: |
| ``` |
| result[0] := if mask[0] then base[i + 0] else pass_thru[0] |
| result[1] := if mask[1] then base[i + 1] else pass_thru[1] |
| etc. |
| ``` |
| |
| If a mask bit is set and the corresponding index is out-of-bounds for the |
| given base, the behavior is undefined. If a mask bit is not set, the value |
| comes from the pass-through vector regardless of the index, and the index is |
| allowed to be out-of-bounds. |
| |
| The masked load can be used directly where applicable, or can be used |
| during progressively lowering to bring other memory operations closer to |
| hardware ISA support for a masked load. The semantics of the operation |
| closely correspond to those of the `llvm.masked.load` |
| [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-load-intrinsics). |
| |
| Examples: |
| |
| ```mlir |
| %0 = vector.maskedload %base[%i], %mask, %pass_thru |
| : memref<?xf32>, vector<8xi1>, vector<8xf32> into vector<8xf32> |
| |
| %1 = vector.maskedload %base[%i, %j], %mask, %pass_thru |
| : memref<?x?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32> |
| ``` |
| |
| An optional `alignment` attribute allows to specify the byte alignment of the |
| load operation. It must be a positive power of 2. The operation must access |
| memory at an address aligned to this boundary. Violating this requirement |
| triggers immediate undefined behavior. |
| }]; |
| let extraClassDeclaration = [{ |
| MemRefType getMemRefType() { |
| return ::llvm::cast<MemRefType>(getBase().getType()); |
| } |
| VectorType getMaskVectorType() { |
| return ::llvm::cast<VectorType>(getMask().getType()); |
| } |
| VectorType getPassThruVectorType() { |
| return ::llvm::cast<VectorType>(getPassThru().getType()); |
| } |
| VectorType getVectorType() { |
| return ::llvm::cast<VectorType>(getResult().getType()); |
| } |
| }]; |
| let assemblyFormat = "$base `[` $indices `]` `,` $mask `,` $pass_thru attr-dict `:` " |
| "type($base) `,` type($mask) `,` type($pass_thru) `into` type($result)"; |
| let hasCanonicalizer = 1; |
| let hasFolder = 1; |
| let hasVerifier = 1; |
| |
| let builders = [ |
| OpBuilder<(ins "VectorType":$resultType, |
| "Value":$base, |
| "ValueRange":$indices, |
| "Value":$mask, |
| "Value":$passthrough, |
| CArg<"llvm::MaybeAlign", "llvm::MaybeAlign()">:$alignment), [{ |
| return build($_builder, $_state, resultType, base, indices, mask, passthrough, |
| alignment.has_value() ? $_builder.getI64IntegerAttr(alignment->value()) : |
| nullptr); |
| }]>, |
| OpBuilder<(ins "TypeRange":$resultTypes, |
| "Value":$base, |
| "ValueRange":$indices, |
| "Value":$mask, |
| "Value":$passthrough, |
| CArg<"llvm::MaybeAlign", "llvm::MaybeAlign()">:$alignment), [{ |
| return build($_builder, $_state, resultTypes, base, indices, mask, passthrough, |
| alignment.has_value() ? $_builder.getI64IntegerAttr(alignment->value()) : |
| nullptr); |
| }]> |
| ]; |
| } |
| |
| def Vector_MaskedStoreOp : |
| Vector_Op<"maskedstore", [ |
| DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>, |
| DeclareOpInterfaceMethods<AlignmentAttrOpInterface> |
| ]>, |
| Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base, |
| Variadic<Index>:$indices, |
| VectorOfNonZeroRankOf<[I1]>:$mask, |
| AnyVectorOfNonZeroRank:$valueToStore, |
| OptionalAttr<IntValidAlignment<I64Attr>>: $alignment)> { |
| |
| let summary = "stores elements from a vector into memory as defined by a mask vector"; |
| |
| let description = [{ |
| The masked store operation writes elements from a vector into memory |
| as defined by a base with indices and a mask vector. When the mask is |
| set, the corresponding element from the vector is written to memory. Otherwise, |
| no action is taken for the element. Informally the semantics are: |
| ``` |
| if (mask[0]) base[i+0] = value[0] |
| if (mask[1]) base[i+1] = value[1] |
| etc. |
| ``` |
| |
| If a mask bit is set and the corresponding index is out-of-bounds for the |
| given base, the behavior is undefined. If a mask bit is not set, no value |
| is stored regardless of the index, and the index is allowed to be |
| out-of-bounds. |
| |
| The masked store can be used directly where applicable, or can be used |
| during progressively lowering to bring other memory operations closer to |
| hardware ISA support for a masked store. The semantics of the operation |
| closely correspond to those of the `llvm.masked.store` |
| [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-store-intrinsics). |
| |
| Examples: |
| |
| ```mlir |
| vector.maskedstore %base[%i], %mask, %value |
| : memref<?xf32>, vector<8xi1>, vector<8xf32> |
| |
| vector.maskedstore %base[%i, %j], %mask, %value |
| : memref<?x?xf32>, vector<16xi1>, vector<16xf32> |
| ``` |
| |
| An optional `alignment` attribute allows to specify the byte alignment of the |
| store operation. It must be a positive power of 2. The operation must access |
| memory at an address aligned to this boundary. Violating this requirement |
| triggers immediate undefined behavior. |
| }]; |
| let extraClassDeclaration = [{ |
| MemRefType getMemRefType() { |
| return ::llvm::cast<MemRefType>(getBase().getType()); |
| } |
| VectorType getMaskVectorType() { |
| return ::llvm::cast<VectorType>(getMask().getType()); |
| } |
| VectorType getVectorType() { |
| return ::llvm::cast<VectorType>(getValueToStore().getType()); |
| } |
| }]; |
| let assemblyFormat = |
| "$base `[` $indices `]` `,` $mask `,` $valueToStore " |
| "attr-dict `:` type($base) `,` type($mask) `,` type($valueToStore)"; |
| let hasCanonicalizer = 1; |
| let hasFolder = 1; |
| let hasVerifier = 1; |
| |
| let builders = [ |
| OpBuilder<(ins "Value":$base, |
| "ValueRange":$indices, |
| "Value":$mask, |
| "Value":$valueToStore, |
| CArg<"llvm::MaybeAlign", "llvm::MaybeAlign()">:$alignment), [{ |
| return build($_builder, $_state, base, indices, mask, valueToStore, |
| alignment.has_value() ? $_builder.getI64IntegerAttr(alignment->value()) : |
| nullptr); |
| }]> |
| ]; |
| } |
| |
| def Vector_GatherOp : |
| Vector_Op<"gather", [ |
| DeclareOpInterfaceMethods<MaskableOpInterface>, |
| DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>, |
| DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>, |
| DeclareOpInterfaceMethods<AlignmentAttrOpInterface> |
| ]>, |
| Arguments<(ins Arg<TensorOrMemRef<[AnyType]>, "", [MemRead]>:$base, |
| Variadic<Index>:$offsets, |
| VectorOfNonZeroRankOf<[AnyInteger, Index]>:$indices, |
| VectorOfNonZeroRankOf<[I1]>:$mask, |
| AnyVectorOfNonZeroRank:$pass_thru, |
| OptionalAttr<IntValidAlignment<I64Attr>>: $alignment)>, |
| Results<(outs AnyVectorOfNonZeroRank:$result)> { |
| |
| let summary = [{ |
| Gathers elements from memory or ranked tensor into a vector as defined by an |
| index vector and a mask vector. |
| }]; |
| |
| let description = [{ |
| The gather operation returns an n-D vector whose elements are either loaded |
| from a k-D memref or tensor, or taken from an n-D pass-through vector, depending |
| on the values of an n-D mask vector. |
| |
| If a mask bit is set, the corresponding result element is taken from `base` |
| at an index defined by k indices and n-D `index_vec`. Otherwise, the element |
| is taken from the pass-through vector. As an example, suppose that `base` is |
| 3-D and the result is 2-D: |
| |
| ```mlir |
| func.func @gather_3D_to_2D( |
| %base: memref<?x10x?xf32>, %ofs_0: index, %ofs_1: index, %ofs_2: index, |
| %indices: vector<2x3xi32>, %mask: vector<2x3xi1>, |
| %fall_thru: vector<2x3xf32>) -> vector<2x3xf32> { |
| %result = vector.gather %base[%ofs_0, %ofs_1, %ofs_2] |
| [%indices], %mask, %fall_thru : [...] |
| return %result : vector<2x3xf32> |
| } |
| ``` |
| |
| The indexing semantics are then, |
| |
| ``` |
| result[i,j] := if mask[i,j] then base[i0, i1, i2 + indices[i,j]] |
| else pass_thru[i,j] |
| ``` |
| The index into `base` only varies in the innermost ((k-1)-th) dimension. |
| |
| If a mask bit is set and the corresponding index is out-of-bounds for the |
| given base, the behavior is undefined. If a mask bit is not set, the value |
| comes from the pass-through vector regardless of the index, and the index is |
| allowed to be out-of-bounds. |
| |
| The gather operation can be used directly where applicable, or can be used |
| during progressively lowering to bring other memory operations closer to |
| hardware ISA support for a gather. |
| |
| An optional `alignment` attribute allows to specify the byte alignment of the |
| gather operation. It must be a positive power of 2. The operation must access |
| memory at an address aligned to this boundary. Violating this requirement |
| triggers immediate undefined behavior. |
| |
| Examples: |
| |
| ```mlir |
| // 1-D memref gathered to 2-D vector. |
| %0 = vector.gather %base[%c0][%v], %mask, %pass_thru |
| : memref<?xf32>, vector<2x16xi32>, vector<2x16xi1>, vector<2x16xf32> into vector<2x16xf32> |
| |
| // 2-D memref gathered to 1-D vector. |
| %1 = vector.gather %base[%i, %j][%v], %mask, %pass_thru |
| : memref<16x16xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32> |
| ``` |
| }]; |
| |
| let extraClassDeclaration = [{ |
| ShapedType getBaseType() { return getBase().getType(); } |
| VectorType getIndexVectorType() { return getIndices().getType(); } |
| VectorType getMaskVectorType() { return getMask().getType(); } |
| VectorType getPassThruVectorType() { return getPassThru().getType(); } |
| VectorType getVectorType() { return getResult().getType(); } |
| }]; |
| |
| let assemblyFormat = |
| "$base `[` $offsets `]` `[` $indices `]` `,` " |
| "$mask `,` $pass_thru attr-dict `:` type($base) `,` " |
| "type($indices) `,` type($mask) `,` type($pass_thru) " |
| "`into` type($result)"; |
| let hasCanonicalizer = 1; |
| let hasVerifier = 1; |
| |
| let builders = [ |
| OpBuilder<(ins "VectorType":$resultType, |
| "Value":$base, |
| "ValueRange":$indices, |
| "Value":$index_vec, |
| "Value":$mask, |
| "Value":$passthrough, |
| CArg<"llvm::MaybeAlign", "llvm::MaybeAlign()">:$alignment), [{ |
| return build($_builder, $_state, resultType, base, indices, index_vec, mask, passthrough, |
| alignment.has_value() ? $_builder.getI64IntegerAttr(alignment->value()) : |
| nullptr); |
| }]> |
| ]; |
| } |
| |
| def Vector_ScatterOp : |
| Vector_Op<"scatter", [ |
| DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>, |
| DeclareOpInterfaceMethods<AlignmentAttrOpInterface> |
| ]>, |
| Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base, |
| Variadic<Index>:$offsets, |
| VectorOfNonZeroRankOf<[AnyInteger, Index]>:$indices, |
| VectorOfNonZeroRankOf<[I1]>:$mask, |
| AnyVectorOfNonZeroRank:$valueToStore, |
| OptionalAttr<IntValidAlignment<I64Attr>>: $alignment)> { |
| |
| let summary = [{ |
| scatters elements from a vector into memory as defined by an index vector |
| and a mask vector |
| }]; |
| |
| let description = [{ |
| The scatter operation stores elements from a n-D vector into memory as |
| defined by a base with indices and an additional n-D index vector, but |
| only if the corresponding bit in a n-D mask vector is set. Otherwise, no |
| action is taken for that element. Informally the semantics are: |
| ``` |
| if (mask[0]) base[index[0]] = value[0] |
| if (mask[1]) base[index[1]] = value[1] |
| etc. |
| ``` |
| |
| If a mask bit is set and the corresponding index is out-of-bounds for the |
| given base, the behavior is undefined. If a mask bit is not set, no value |
| is stored regardless of the index, and the index is allowed to be |
| out-of-bounds. |
| |
| If the index vector contains two or more duplicate indices, the behavior is |
| undefined. Underlying implementation may enforce strict sequential |
| semantics. |
| TODO: always enforce strict sequential semantics? |
| |
| The scatter operation can be used directly where applicable, or can be used |
| during progressively lowering to bring other memory operations closer to |
| hardware ISA support for a scatter. The semantics of the operation closely |
| correspond to those of the `llvm.masked.scatter` |
| [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-scatter-intrinsics). |
| |
| An optional `alignment` attribute allows to specify the byte alignment of the |
| scatter operation. It must be a positive power of 2. The operation must access |
| memory at an address aligned to this boundary. Violating this requirement |
| triggers immediate undefined behavior. |
| |
| Examples: |
| |
| ```mlir |
| vector.scatter %base[%c0][%v], %mask, %value |
| : memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> |
| |
| vector.scatter %base[%i, %j][%v], %mask, %value |
| : memref<16x16xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> |
| ``` |
| }]; |
| |
| let extraClassDeclaration = [{ |
| MemRefType getMemRefType() { return getBase().getType(); } |
| VectorType getIndexVectorType() { return getIndices().getType(); } |
| VectorType getMaskVectorType() { return getMask().getType(); } |
| VectorType getVectorType() { return getValueToStore().getType(); } |
| }]; |
| |
| let assemblyFormat = |
| "$base `[` $offsets `]` `[` $indices `]` `,` " |
| "$mask `,` $valueToStore attr-dict `:` type($base) `,` " |
| "type($indices) `,` type($mask) `,` type($valueToStore)"; |
| let hasCanonicalizer = 1; |
| let hasVerifier = 1; |
| |
| let builders = [ |
| OpBuilder<(ins "Value":$base, |
| "ValueRange":$indices, |
| "Value":$index_vec, |
| "Value":$mask, |
| "Value":$valueToStore, |
| CArg<"llvm::MaybeAlign", "llvm::MaybeAlign()">: $alignment), [{ |
| return build($_builder, $_state, base, indices, index_vec, mask, valueToStore, |
| alignment.has_value() ? $_builder.getI64IntegerAttr(alignment->value()) : |
| nullptr); |
| }]> |
| ]; |
| } |
| |
| def Vector_ExpandLoadOp : |
| Vector_Op<"expandload", [ |
| DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>, |
| DeclareOpInterfaceMethods<AlignmentAttrOpInterface> |
| ]>, |
| Arguments<(ins Arg<AnyMemRef, "", [MemRead]>:$base, |
| Variadic<Index>:$indices, |
| FixedVectorOfNonZeroRankOf<[I1]>:$mask, |
| AnyVectorOfNonZeroRank:$pass_thru, |
| OptionalAttr<IntValidAlignment<I64Attr>>: $alignment)>, |
| Results<(outs AnyVectorOfNonZeroRank:$result)> { |
| |
| let summary = "reads elements from memory and spreads them into a vector as defined by a mask"; |
| |
| let description = [{ |
| The expand load reads elements from memory into a vector as defined by a |
| base with indices and a mask vector. Expansion only applies to the innermost |
| dimension. When the mask is set, the next element is read from memory. |
| Otherwise, the corresponding element is taken from a pass-through vector. |
| Informally the semantics are: |
| |
| ``` |
| index = i |
| result[0] := if mask[0] then base[index++] else pass_thru[0] |
| result[1] := if mask[1] then base[index++] else pass_thru[1] |
| etc. |
| ``` |
| |
| Note that the index increment is done conditionally. |
| |
| If a mask bit is set and the corresponding index is out-of-bounds for the |
| given base, the behavior is undefined. If a mask bit is not set, the value |
| comes from the pass-through vector regardless of the index, and the index is |
| allowed to be out-of-bounds. |
| |
| The expand load can be used directly where applicable, or can be used |
| during progressively lowering to bring other memory operations closer to |
| hardware ISA support for an expand. The semantics of the operation closely |
| correspond to those of the `llvm.masked.expandload` |
| [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-expandload-intrinsics). |
| |
| An optional `alignment` attribute allows to specify the byte alignment of the |
| load operation. It must be a positive power of 2. The operation must access |
| memory at an address aligned to this boundary. Violating this requirement |
| triggers immediate undefined behavior. |
| |
| Note, at the moment this Op is only available for fixed-width vectors. |
| |
| Examples: |
| |
| ```mlir |
| %0 = vector.expandload %base[%i], %mask, %pass_thru |
| : memref<?xf32>, vector<8xi1>, vector<8xf32> into vector<8xf32> |
| |
| %1 = vector.expandload %base[%i, %j], %mask, %pass_thru |
| : memref<?x?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32> |
| ``` |
| }]; |
| let extraClassDeclaration = [{ |
| MemRefType getMemRefType() { |
| return ::llvm::cast<MemRefType>(getBase().getType()); |
| } |
| VectorType getMaskVectorType() { |
| return ::llvm::cast<VectorType>(getMask().getType()); |
| } |
| VectorType getPassThruVectorType() { |
| return ::llvm::cast<VectorType>(getPassThru().getType()); |
| } |
| VectorType getVectorType() { |
| return ::llvm::cast<VectorType>(getResult().getType()); |
| } |
| }]; |
| let assemblyFormat = "$base `[` $indices `]` `,` $mask `,` $pass_thru attr-dict `:` " |
| "type($base) `,` type($mask) `,` type($pass_thru) `into` type($result)"; |
| let hasCanonicalizer = 1; |
| let hasVerifier = 1; |
| |
| let builders = [ |
| OpBuilder<(ins "VectorType":$resultType, |
| "Value":$base, |
| "ValueRange":$indices, |
| "Value":$mask, |
| "Value":$passthrough, |
| CArg<"llvm::MaybeAlign", "llvm::MaybeAlign()">:$alignment), [{ |
| return build($_builder, $_state, resultType, base, indices, mask, passthrough, |
| alignment.has_value() ? $_builder.getI64IntegerAttr(alignment->value()) : |
| nullptr); |
| }]> |
| ]; |
| } |
| |
| def Vector_CompressStoreOp : |
| Vector_Op<"compressstore", [ |
| DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>, |
| DeclareOpInterfaceMethods<AlignmentAttrOpInterface> |
| ]>, |
| Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base, |
| Variadic<Index>:$indices, |
| FixedVectorOfNonZeroRankOf<[I1]>:$mask, |
| AnyVectorOfNonZeroRank:$valueToStore, |
| OptionalAttr<IntValidAlignment<I64Attr>>: $alignment)> { |
| |
| let summary = "writes elements selectively from a vector as defined by a mask"; |
| |
| let description = [{ |
| The compress store operation writes elements from a vector into memory as |
| defined by a base with indices and a mask vector. Compression only applies |
| to the innermost dimension. When the mask is set, the corresponding element |
| from the vector is written next to memory. Otherwise, no action is taken |
| for the element. Informally the semantics are: |
| |
| ``` |
| index = i |
| if (mask[0]) base[index++] = value[0] |
| if (mask[1]) base[index++] = value[1] |
| etc. |
| ``` |
| |
| Note that the index increment is done conditionally. |
| |
| If a mask bit is set and the corresponding index is out-of-bounds for the |
| given base, the behavior is undefined. If a mask bit is not set, no value |
| is stored regardless of the index, and the index is allowed to be |
| out-of-bounds. |
| |
| The compress store can be used directly where applicable, or can be used |
| during progressively lowering to bring other memory operations closer to |
| hardware ISA support for a compress. The semantics of the operation closely |
| correspond to those of the `llvm.masked.compressstore` |
| [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-compressstore-intrinsics). |
| |
| An optional `alignment` attribute allows to specify the byte alignment of the |
| store operation. It must be a positive power of 2. The operation must access |
| memory at an address aligned to this boundary. Violating this requirement |
| triggers immediate undefined behavior. |
| |
| Note, at the moment this Op is only available for fixed-width vectors. |
| |
| Examples: |
| |
| ```mlir |
| vector.compressstore %base[%i], %mask, %value |
| : memref<?xf32>, vector<8xi1>, vector<8xf32> |
| |
| vector.compressstore %base[%i, %j], %mask, %value |
| : memref<?x?xf32>, vector<16xi1>, vector<16xf32> |
| ``` |
| }]; |
| let extraClassDeclaration = [{ |
| MemRefType getMemRefType() { |
| return ::llvm::cast<MemRefType>(getBase().getType()); |
| } |
| VectorType getMaskVectorType() { |
| return ::llvm::cast<VectorType>(getMask().getType()); |
| } |
| VectorType getVectorType() { |
| return ::llvm::cast<VectorType>(getValueToStore().getType()); |
| } |
| }]; |
| let assemblyFormat = |
| "$base `[` $indices `]` `,` $mask `,` $valueToStore attr-dict `:` " |
| "type($base) `,` type($mask) `,` type($valueToStore)"; |
| let hasCanonicalizer = 1; |
| let hasVerifier = 1; |
| let builders = [ |
| OpBuilder<(ins "Value":$base, |
| "ValueRange":$indices, |
| "Value":$mask, |
| "Value":$valueToStore, |
| CArg<"llvm::MaybeAlign", "llvm::MaybeAlign()">:$alignment), [{ |
| return build($_builder, $_state, base, indices, valueToStore, mask, |
| alignment.has_value() ? $_builder.getI64IntegerAttr(alignment->value()) : |
| nullptr); |
| }]> |
| ]; |
| } |
| |
| def Vector_ShapeCastOp : |
| Vector_Op<"shape_cast", [Pure, |
| DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]> |
| ]>, |
| Arguments<(ins AnyVectorOfAnyRank:$source)>, |
| Results<(outs AnyVectorOfAnyRank:$result)> { |
| let summary = "shape_cast casts between vector shapes"; |
| let description = [{ |
| Casts to a vector with the same number of elements, element type, and |
| number of scalable dimensions. |
| |
| It is currently assumed that this operation does not require moving data, |
| and that it will be folded away before lowering vector operations. |
| |
| There is an exception to the folding expectation when targeting |
| llvm.intr.matrix operations. We need a type conversion back and forth from a |
| 2-D MLIR vector to a 1-D flattened LLVM vector.shape_cast lowering to LLVM |
| is supported in that particular case, for now. |
| |
| Examples: |
| |
| ```mlir |
| %1 = vector.shape_cast %0 : vector<4x3xf32> to vector<3x2x2xf32> |
| |
| // with 2 scalable dimensions (number of which must be preserved). |
| %3 = vector.shape_cast %2 : vector<[2]x3x[4]xi8> to vector<3x[1]x[8]xi8> |
| ``` |
| }]; |
| let extraClassDeclaration = [{ |
| VectorType getSourceVectorType() { |
| return ::llvm::cast<VectorType>(getSource().getType()); |
| } |
| VectorType getResultVectorType() { |
| return ::llvm::cast<VectorType>(getResult().getType()); |
| } |
| }]; |
| let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)"; |
| let hasFolder = 1; |
| let hasCanonicalizer = 1; |
| let hasVerifier = 1; |
| } |
| |
| def Vector_BitCastOp : |
| Vector_Op<"bitcast", [Pure, AllRanksMatch<["source", "result"]>]>, |
| Arguments<(ins AnyVectorOfAnyRank:$source)>, |
| Results<(outs AnyVectorOfAnyRank:$result)>{ |
| let summary = "bitcast casts between vectors"; |
| let description = [{ |
| The bitcast operation casts between vectors of the same rank, the minor 1-D |
| vector size is casted to a vector with a different element type but same |
| bitwidth. In case of 0-D vectors, the bitwidth of element types must be |
| equal. |
| |
| Example: |
| |
| ```mlir |
| // Example casting to a smaller element type. |
| %1 = vector.bitcast %0 : vector<5x1x4x3xf32> to vector<5x1x4x6xi16> |
| |
| // Example casting to a bigger element type. |
| %3 = vector.bitcast %2 : vector<10x12x8xi8> to vector<10x12x2xi32> |
| |
| // Example casting to an element type of the same size. |
| %5 = vector.bitcast %4 : vector<5x1x4x3xf32> to vector<5x1x4x3xi32> |
| |
| // Example casting of 0-D vectors. |
| %7 = vector.bitcast %6 : vector<f32> to vector<i32> |
| ``` |
| }]; |
| let extraClassDeclaration = [{ |
| VectorType getSourceVectorType() { |
| return ::llvm::cast<VectorType>(getSource().getType()); |
| } |
| VectorType getResultVectorType() { |
| return ::llvm::cast<VectorType>(getResult().getType()); |
| } |
| }]; |
| let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)"; |
| let hasFolder = 1; |
| let hasVerifier = 1; |
| } |
| |
| def Vector_TypeCastOp : |
| Vector_Op<"type_cast", [Pure, ViewLikeOpInterface]>, |
| Arguments<(ins StaticShapeMemRefOf<[AnyType]>:$memref)>, |
| Results<(outs AnyMemRef:$result)> { |
| let summary = "type_cast op converts a scalar memref to a vector memref"; |
| let description = [{ |
| Performs a conversion from a memref with scalar element to a memref with a |
| *single* vector element, copying the shape of the memref to the vector. This |
| is the minimal viable operation that is required to makeke |
| super-vectorization operational. It can be seen as a special case of the |
| `view` operation but scoped in the super-vectorization context. |
| |
| Example: |
| |
| ```mlir |
| %A = memref.alloc() : memref<5x4x3xf32> |
| %VA = vector.type_cast %A : memref<5x4x3xf32> to memref<vector<5x4x3xf32>> |
| ``` |
| }]; |
| |
| /// Build the canonical memRefType with a single vector. |
| /// E.g. memref<4 x 5 x vector<6 x f32>> -> memref<vector<4 x 5 x 6 x f32>>. |
| let builders = [OpBuilder<(ins "Value":$source)>]; |
| |
| let extraClassDeclaration = [{ |
| MemRefType getMemRefType() { |
| return ::llvm::cast<MemRefType>(getMemref().getType()); |
| } |
| MemRefType getResultMemRefType() { |
| return ::llvm::cast<MemRefType>(getResult().getType()); |
| } |
| // Implement ViewLikeOpInterface. |
| Value getViewSource() { return getMemref(); } |
| }]; |
| |
| let assemblyFormat = [{ |
| $memref attr-dict `:` type($memref) `to` type($result) |
| }]; |
| let hasVerifier = 1; |
| } |
| |
| def Vector_ConstantMaskOp : |
| Vector_Op<"constant_mask", [Pure]>, |
| Arguments<(ins DenseI64ArrayAttr:$mask_dim_sizes)>, |
| Results<(outs VectorOfAnyRankOf<[I1]>)> { |
| let summary = "creates a constant vector mask"; |
| let description = [{ |
| Creates and returns a vector mask where elements of the result vector |
| are set to '0' or '1', based on whether the element indices are contained |
| within a hyper-rectangular region specified by the 'mask_dim_sizes' |
| array attribute argument. Each element of the 'mask_dim_sizes' array, |
| specifies an exclusive upper bound [0, mask-dim-size-element-value) |
| for a unique dimension in the vector result. The conjunction of the ranges |
| define a hyper-rectangular region within which elements values are set to 1 |
| (otherwise element values are set to 0). Each value of 'mask_dim_sizes' must |
| be non-negative and not greater than the size of the corresponding vector |
| dimension (as opposed to vector.create_mask which allows this). Sizes that |
| correspond to scalable dimensions are implicitly multiplied by vscale, |
| though currently only zero (none set) or the size of the dim/vscale |
| (all set) are supported. |
| |
| Example: |
| |
| ```mlir |
| // create a constant vector mask of size 4x3xi1 with elements in range |
| // 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0). |
| %1 = vector.constant_mask [3, 2] : vector<4x3xi1> |
| |
| print %1 |
| columns |
| 0 1 2 |
| |------------ |
| 0 | 1 1 0 |
| rows 1 | 1 1 0 |
| 2 | 1 1 0 |
| 3 | 0 0 0 |
| ``` |
| }]; |
| |
| let builders = [ |
| // Build with mixed static/dynamic operands. |
| OpBuilder<(ins "VectorType":$type, "ConstantMaskKind":$kind)> |
| ]; |
| |
| let extraClassDeclaration = [{ |
| /// Return the result type of this op. |
| VectorType getVectorType() { |
| return cast<VectorType>(getOperation()->getResultTypes()[0]); |
| } |
| |
| /// Return whether the mask is a uniform vector of `1`s. |
| bool isAllOnesMask(); |
| }]; |
| |
| let assemblyFormat = "$mask_dim_sizes attr-dict `:` type(results)"; |
| let hasVerifier = 1; |
| let hasFolder = 1; |
| } |
| |
| def Vector_CreateMaskOp : |
| Vector_Op<"create_mask", [Pure]>, |
| Arguments<(ins Variadic<Index>:$operands)>, |
| Results<(outs VectorOfAnyRankOf<[I1]>)> { |
| let summary = "creates a vector mask"; |
| let description = [{ |
| Creates and returns a vector mask where elements of the result vector |
| are set to '0' or '1', based on whether the element indices are contained |
| within a hyper-rectangular region specified by the operands. Specifically, |
| each operand specifies a range [0, operand-value) for a unique dimension in |
| the vector result. The conjunction of the operand ranges define a |
| hyper-rectangular region within which elements values are set to 1 |
| (otherwise element values are set to 0). If operand-value is negative, it is |
| treated as if it were zero, and if it is greater than the corresponding |
| dimension size, it is treated as if it were equal to the dimension size. |
| |
| Example: |
| |
| ```mlir |
| // create a vector mask of size 4x3xi1 where elements in range |
| // 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0). |
| %1 = vector.create_mask %c3, %c2 : vector<4x3xi1> |
| |
| print %1 |
| columns |
| 0 1 2 |
| |------------ |
| 0 | 1 1 0 |
| rows 1 | 1 1 0 |
| 2 | 1 1 0 |
| 3 | 0 0 0 |
| ``` |
| }]; |
| |
| let builders = [ |
| // Build with mixed static/dynamic operands. |
| OpBuilder<(ins "VectorType":$type, "ArrayRef<OpFoldResult>":$mixedOperands)> |
| ]; |
| |
| let extraClassDeclaration = [{ |
| /// Return the result type of this op. |
| VectorType getVectorType() { |
| return cast<VectorType>(getOperation()->getResultTypes()[0]); |
| } |
| }]; |
| |
| let hasCanonicalizer = 1; |
| let hasVerifier = 1; |
| let assemblyFormat = "$operands attr-dict `:` type(results)"; |
| } |
| |
| def Vector_MaskOp : Vector_Op<"mask", [ |
| SingleBlockImplicitTerminator<"vector::YieldOp">, |
| DeclareOpInterfaceMethods<MaskingOpInterface>, |
| RecursiveMemoryEffects, NoRegionArguments |
| ]> { |
| let summary = "Predicates a maskable vector operation"; |
| let description = [{ |
| The `vector.mask` is a `MaskingOpInterface` operation that predicates the |
| execution of another operation. It takes an `i1` vector mask and an |
| optional passthru vector as arguments. |
| |
| A implicitly `vector.yield`-terminated region encloses the operation to be |
| masked. Values used within the region are captured from above. Only one |
| *maskable* operation can be masked with a `vector.mask` operation at a time. |
| An operation is *maskable* if it implements the `MaskableOpInterface`. The |
| terminator yields all results from the maskable operation to the result of |
| this operation. No other values are allowed to be yielded. |
| |
| An empty `vector.mask` operation is currently legal to enable optimizations |
| across the `vector.mask` region. However, this might change in the future |
| once vector transformations gain better support for `vector.mask`. |
| TODO: Consider making empty `vector.mask` illegal. |
| |
| The vector mask argument holds a bit for each vector lane and determines |
| which vector lanes should execute the maskable operation and which ones |
| should not. The `vector.mask` operation returns the value produced by the |
| masked execution of the nested operation, if any. The masked-off lanes in |
| the result vector are taken from the corresponding lanes of the pass-thru |
| argument, if provided, or left unmodified, otherwise. At this point, 0-D |
| vectors are not supported by `vector.mask`. They may be supported in the |
| future. |
| |
| The `vector.mask` operation does not prescribe how a maskable operation |
| should be masked or how a masked operation should be lowered. Masking |
| constraints and some semantic details are provided by each maskable |
| operation through the `MaskableOpInterface`. Lowering of masked operations |
| is implementation defined. For instance, scalarizing the masked operation |
| or executing the operation for the masked-off lanes are valid lowerings as |
| long as the execution of masked-off lanes does not change the observable |
| behavior of the program. |
| |
| Examples: |
| |
| ``` |
| %0 = vector.mask %mask { vector.reduction <add>, %a : vector<8xi32> into i32 } : vector<8xi1> -> i32 |
| ``` |
| |
| ``` |
| %0 = vector.mask %mask, %passthru { arith.divsi %a, %b : vector<8xi32> } : vector<8xi1> -> vector<8xi32> |
| ``` |
| |
| ``` |
| vector.mask %mask { vector.transfer_write %val, %t0[%idx] : vector<16xf32>, memref<?xf32> } : vector<16xi1> |
| ``` |
| |
| ``` |
| vector.mask %mask { vector.transfer_write %val, %t0[%idx] : vector<16xf32>, tensor<?xf32> } : vector<16xi1> -> tensor<?xf32> |
| ``` |
| }]; |
| |
| // TODO: Support multiple passthru values. |
| let arguments = (ins VectorOfNonZeroRankOf<[I1]>:$mask, |
| Optional<AnyType>:$passthru); |
| let results = (outs Variadic<AnyType>:$results); |
| let regions = (region SizedRegion<1>:$maskRegion); |
| |
| let skipDefaultBuilders = 1; |
| let builders = [ |
| OpBuilder<(ins "Value":$mask, "Operation *":$maskableOp, |
| CArg<"function_ref<void(OpBuilder &, Operation *)>">:$maskRegion)>, |
| OpBuilder<(ins "TypeRange":$resultTypes, "Value":$mask, "Operation *":$maskableOp, |
| CArg<"function_ref<void(OpBuilder &, Operation *)>">:$maskRegion)>, |
| OpBuilder<(ins "TypeRange":$resultTypes, "Value":$mask, "Value":$passthru, |
| "Operation *":$maskableOp, |
| CArg<"function_ref<void(OpBuilder &, Operation *)>">:$maskRegion)> |
| ]; |
| |
| let extraClassDeclaration = [{ |
| Block *getMaskBlock() { return &getMaskRegion().front(); } |
| |
| /// Returns true if mask op is not masking any operation. |
| bool isEmpty() { |
| Block *block = getMaskBlock(); |
| if (block->getOperations().size() > 1) |
| return false; |
| return true; |
| } |
| |
| static void ensureTerminator(Region ®ion, Builder &builder, |
| Location loc); |
| }]; |
| |
| let hasCanonicalizer = 1; |
| let hasFolder = 1; |
| let hasCustomAssemblyFormat = 1; |
| let hasVerifier = 1; |
| } |
| |
| def Vector_TransposeOp : |
| Vector_Op<"transpose", [Pure, |
| DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>, |
| DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>, |
| PredOpTrait<"operand and result have same element type", |
| TCresVTEtIsSameAsOpBase<0, 0>>]> { |
| let summary = "vector transpose operation"; |
| let description = [{ |
| Takes a n-D vector and returns the transposed n-D vector defined by |
| the permutation of ranks in the n-sized integer array attribute (in case |
| of 0-D vectors the array attribute must be empty). |
| |
| In the operation |
| |
| ```mlir |
| %1 = vector.transpose %0, [i_1, .., i_n] |
| : vector<d_1 x .. x d_n x f32> |
| to vector<d_trans[0] x .. x d_trans[n-1] x f32> |
| ``` |
| |
| the `permutation` array [i_1, .., i_n] must be a permutation of [0, .., n-1]. |
| |
| Example: |
| |
| ```mlir |
| %1 = vector.transpose %0, [1, 0] : vector<2x3xf32> to vector<3x2xf32> |
| |
| [ [a, b, c], [ [a, d], |
| [d, e, f] ] -> [b, e], |
| [c, f] ] |
| ``` |
| }]; |
| |
| let arguments = (ins AnyVectorOfAnyRank:$vector, |
| DenseI64ArrayAttr:$permutation); |
| let results = (outs AnyVectorOfAnyRank:$result); |
| |
| let builders = [ |
| OpBuilder<(ins "Value":$vector, "ArrayRef<int64_t>":$permutation)> |
| ]; |
| let extraClassDeclaration = [{ |
| VectorType getSourceVectorType() { |
| return ::llvm::cast<VectorType>(getVector().getType()); |
| } |
| VectorType getResultVectorType() { |
| return ::llvm::cast<VectorType>(getResult().getType()); |
| } |
| }]; |
| let assemblyFormat = [{ |
| $vector `,` $permutation attr-dict `:` type($vector) `to` type($result) |
| }]; |
| let hasCanonicalizer = 1; |
| let hasFolder = 1; |
| let hasVerifier = 1; |
| } |
| |
| def Vector_PrintOp : |
| Vector_Op<"print", [ |
| MemoryEffects<[MemWrite]>, |
| PredOpTrait< |
| "`source` or `punctuation` are not set when printing strings", |
| CPred<"!getStringLiteral() || (!getSource() && getPunctuation() == PrintPunctuation::NewLine)"> |
| >, |
| ]>, |
| Arguments<(ins Optional<Type<Or<[ |
| AnyVectorOfAnyRank.predicate, |
| AnyInteger.predicate, Index.predicate, AnyFloat.predicate |
| ]>>>:$source, DefaultValuedAttr<Vector_PrintPunctuation, |
| "::mlir::vector::PrintPunctuation::NewLine">:$punctuation, |
| OptionalAttr<Builtin_StringAttr>:$stringLiteral) |
| > { |
| let summary = "print operation (for testing and debugging)"; |
| let description = [{ |
| Prints the source vector (or scalar) to stdout in a human-readable format |
| (for testing and debugging). No return value. |
| |
| Example: |
| |
| ```mlir |
| %v = arith.constant dense<0.0> : vector<4xf32> |
| vector.print %v : vector<4xf32> |
| ``` |
| |
| When lowered to LLVM, the vector print is decomposed into elementary |
| printing method calls that at runtime will yield: |
| |
| ``` |
| ( 0.0, 0.0, 0.0, 0.0 ) |
| ``` |
| |
| This is printed to stdout via a small runtime support library, which only |
| needs to provide a few printing methods (single value for all data |
| types, opening/closing bracket, comma, newline). |
| |
| By default `vector.print` adds a newline after the vector, but this can be |
| controlled by the `punctuation` attribute. For example, to print a comma |
| after instead do: |
| |
| ```mlir |
| vector.print %v : vector<4xf32> punctuation <comma> |
| ``` |
| |
| Note that it is possible to use the punctuation attribute alone. The |
| following will print a single newline: |
| |
| ```mlir |
| vector.print punctuation <newline> |
| ``` |
| |
| Additionally, to aid with debugging and testing `vector.print` can also |
| print constant strings: |
| |
| ```mlir |
| vector.print str "Hello, World!" |
| ``` |
| }]; |
| let extraClassDeclaration = [{ |
| Type getPrintType() { |
| return getSource().getType(); |
| } |
| }]; |
| let builders = [ |
| OpBuilder<(ins "PrintPunctuation":$punctuation), [{ |
| build($_builder, $_state, {}, punctuation, {}); |
| }]>, |
| OpBuilder<(ins "::mlir::Value":$source), [{ |
| build($_builder, $_state, source, PrintPunctuation::NewLine); |
| }]>, |
| OpBuilder<(ins "::mlir::Value":$source, "PrintPunctuation":$punctuation), [{ |
| build($_builder, $_state, source, punctuation, {}); |
| }]>, |
| OpBuilder<(ins "::llvm::StringRef":$string), [{ |
| build($_builder, $_state, {}, PrintPunctuation::NewLine, $_builder.getStringAttr(string)); |
| }]>, |
| ]; |
| |
| let assemblyFormat = [{ |
| ($source^ `:` type($source))? |
| oilist( |
| `str` $stringLiteral |
| | `punctuation` $punctuation) |
| attr-dict |
| }]; |
| } |
| |
| |
| //===----------------------------------------------------------------------===// |
| // VectorScaleOp |
| //===----------------------------------------------------------------------===// |
| |
| // TODO: In the future, we might want to have scalable vectors with different |
| // scales for different dimensions. E.g.: vector<[16]x[16]xf32>, in |
| // which case we might need to add an index to 'vscale' to select one |
| // of them. In order to support GPUs, we might also want to differentiate |
| // between a 'global' scale, a scale that's fixed throughout the |
| // execution, and a 'local' scale that is fixed but might vary with each |
| // call to the function. For that, it might be useful to have a |
| // 'vector.scale.global' and a 'vector.scale.local' operation. |
| def VectorScaleOp : Vector_Op<"vscale", |
| [Pure, DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>] |
| > { |
| let summary = "Load vector scale size"; |
| let description = [{ |
| The `vscale` op returns the scale of the scalable vectors, a positive |
| integer value that is constant at runtime but unknown at compile-time. |
| The scale of the vector indicates the multiplicity of the vectors and |
| vector operations. For example, a `vector<[4]xi32>` is equivalent to |
| `vscale` consecutive `vector<4xi32>`; and an operation on a |
| `vector<[4]xi32>` is equivalent to performing that operation `vscale` |
| times, once on each `<4xi32>` segment of the scalable vector. The `vscale` |
| op can be used to calculate the step in vector-length agnostic (VLA) loops. |
| Right now we only support one contiguous set of scalable dimensions, all of |
| them grouped and scaled with the value returned by 'vscale'. |
| }]; |
| let results = (outs Index:$res); |
| let assemblyFormat = "attr-dict"; |
| |
| let extraClassDefinition = [{ |
| void $cppClass::getAsmResultNames( |
| ::llvm::function_ref<void(mlir::Value, mlir::StringRef)> setNameFn) { |
| setNameFn(getResult(), "vscale"); |
| } |
| }]; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // VectorScanOp |
| //===----------------------------------------------------------------------===// |
| |
| def Vector_ScanOp : |
| Vector_Op<"scan", [Pure, |
| AllTypesMatch<["source", "dest"]>, |
| AllTypesMatch<["initial_value", "accumulated_value"]> ]>, |
| Arguments<(ins Vector_CombiningKindAttr:$kind, |
| AnyVectorOfNonZeroRank:$source, |
| AnyVectorOfAnyRank:$initial_value, |
| I64Attr:$reduction_dim, |
| BoolAttr:$inclusive)>, |
| Results<(outs AnyVectorOfNonZeroRank:$dest, |
| AnyVectorOfAnyRank:$accumulated_value)> { |
| let summary = "Scan operation"; |
| let description = [{ |
| Performs an inclusive/exclusive scan on an n-D vector along a single |
| dimension returning an n-D result vector using the given |
| operation (`add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui`/`and`/`or`/`xor` for |
| integers, and `add`/`mul`/`minnumf`/`maxnumf`/`minimumf`/`maximumf` for |
| floats), and a specified value for the initial value. The operator returns |
| the result of scan as well as the result of the last reduction in the scan. |
| |
| Example: |
| |
| ```mlir |
| %1:2 = vector.scan <add>, %0, %acc {inclusive = false, reduction_dim = 1 : i64} : |
| vector<4x8x16x32xf32>, vector<4x16x32xf32> |
| ``` |
| }]; |
| |
| let extraClassDeclaration = [{ |
| VectorType getSourceType() { |
| return ::llvm::cast<VectorType>(getSource().getType()); |
| } |
| VectorType getDestType() { |
| return ::llvm::cast<VectorType>(getDest().getType()); |
| } |
| VectorType getAccumulatorType() { |
| return ::llvm::cast<VectorType>(getAccumulatedValue().getType()); |
| } |
| VectorType getInitialValueType() { |
| return ::llvm::cast<VectorType>(getInitialValue().getType()); |
| } |
| }]; |
| let assemblyFormat = |
| "$kind `,` $source `,` $initial_value attr-dict `:` " |
| "type($source) `,` type($initial_value) "; |
| let hasVerifier = 1; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // VectorStepOp |
| //===----------------------------------------------------------------------===// |
| |
| def Vector_StepOp : Vector_Op<"step", [ |
| Pure, |
| DeclareOpInterfaceMethods<VectorUnrollOpInterface>, |
| DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]> |
| ]> { |
| let summary = "A linear sequence of values from 0 to N"; |
| let description = [{ |
| A `step` operation produces an index vector, i.e. a 1-D vector of values of |
| index type that represents a linear sequence from 0 to N-1, where N is the |
| number of elements in the `result` vector. |
| |
| Supports fixed-width and scalable vectors. |
| |
| Examples: |
| |
| ```mlir |
| %0 = vector.step : vector<4xindex> ; [0, 1, 2, 3] |
| %1 = vector.step : vector<[4]xindex> ; [0, 1, .., <vscale * 4 - 1>] |
| ``` |
| }]; |
| let results = (outs VectorOfRankAndType<[1], [Index]>:$result); |
| let assemblyFormat = "attr-dict `:` type($result)"; |
| let hasCanonicalizer = 1; |
| } |
| |
| def Vector_YieldOp : Vector_Op<"yield", [ |
| Pure, ReturnLike, Terminator]> { |
| let summary = "Terminates and yields values from vector regions."; |
| let description = [{ |
| "vector.yield" yields an SSA value from the Vector dialect op region and |
| terminates the regions. The semantics of how the values are yielded is |
| defined by the parent operation. |
| If "vector.yield" has any operands, the operands must correspond to the |
| parent operation's results. |
| If the parent operation defines no value the vector.yield may be omitted |
| when printing the region. |
| }]; |
| |
| let arguments = (ins Variadic<AnyType>:$operands); |
| |
| let builders = [ |
| OpBuilder<(ins), [{ /* nothing to do */ }]>, |
| ]; |
| |
| let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?"; |
| } |
| |
| |
| #endif // MLIR_DIALECT_VECTOR_IR_VECTOR_OPS |