mlir/include/mlir/Dialect/Vector/VectorOps.td - llvm-project - Git at Google

 //===- VectorOps.td - Vector op definitions ---------------*- tablegen -*-====//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // Defines MLIR vector operations.
 //
 //===----------------------------------------------------------------------===//

 #ifndef VECTOR_OPS
 #define VECTOR_OPS

 include "mlir/Dialect/Affine/IR/AffineOpsBase.td"
 include "mlir/Interfaces/SideEffects.td"

 def Vector_Dialect : Dialect {
   let name = "vector";
   let cppNamespace = "vector";
   let hasConstantMaterializer = 1;
 }

 // Base class for Vector dialect ops.
 class Vector_Op<string mnemonic, list<OpTrait> traits = []> :
     Op<Vector_Dialect, mnemonic, traits> {
   // For every vector op, there needs to be a:
   //   * void print(OpAsmPrinter &p, ${C++ class of Op} op)
   //   * LogicalResult verify(${C++ class of Op} op)
   //   * ParseResult parse${C++ class of Op}(OpAsmParser &parser,
   //                                         OperationState &result)
   // functions.
   let printer = [{ return ::print(p, *this); }];
   let verifier = [{ return ::verify(*this); }];
   let parser = [{ return ::parse$cppClass(parser, result); }];
 }

 // TODO(andydavis, ntv) Add an attribute to specify a different algebra
 // with operators other than the current set: {*, +}.
 def Vector_ContractionOp :
   Vector_Op<"contract", [NoSideEffect,
      PredOpTrait<"first operand lhs and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 0>>,
      PredOpTrait<"second operand rhs and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 1>>,
      PredOpTrait<"third operand acc and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 1>>]>,
     Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, AnyType:$acc,
                Variadic<VectorOf<[I1]>>:$masks,
                AffineMapArrayAttr:$indexing_maps, ArrayAttr:$iterator_types)>,
     Results<(outs AnyType)> {
   let summary = "vector contraction operation";
   let description = [{
     Computes the sum of products of vector elements along contracting
     dimension pairs from 2 vectors of rank M and N respectively, adds this
     intermediate result to the accumulator argument of rank K, and returns a
     vector result of rank K (where K = num_lhs_free_dims + num_rhs_free_dims +
     num_batch_dims (see dimension type descriptions below)). For K = 0 (no
     free or batch dimensions), the accumulator and output are a scalar.

     Optional vector mask arguments (produced by CreateMaskOp or ConstantMaskOp)
     specify the dynamic dimension sizes of valid data within the lhs/rhs vector
     arguments.

     An iterator type attribute list must be specified, where each element of
     the list represents an iterator with one of the following types:

     *) "reduction": reduction dimensions are present in the lhs and rhs
                     arguments but not in the output (and accumulator
                     argument). These are the dimensions along which the vector
                     contraction op computes the sum of products, and
                     contracting dimension pair dimension sizes must match
                     between lhs/rhs.
     *) "parallel": Batch dimensions are iterator type "parallel", and
                    are non-contracting dimensions present in the lhs, rhs and
                    output. The lhs/rhs co-iterate along the batch dimensions,
                    which should be expressed in their indexing maps.

                    Free dimensions are iterator type "parallel", and are
                    non-contraction, non-batch dimensions accessed by either the
                    lhs or rhs (but not both). The lhs and rhs free dimensions
                    are unrelated to each other and do not co-iterate, which
                    should be expressed in their indexing maps.

     An indexing map attribute list must be specified with an entry for lhs, rhs
     and acc arguments. An indexing map attribute specifies a mapping from each
     iterator in the iterator type list, to each dimension of an N-D vector.

     Example:

     ```mlir
     // Simple dot product (K = 0).
     #contraction_accesses = [
      affine_map<(i) -> (i)>,
      affine_map<(i) -> (i)>,
      affine_map<(i) -> ()>
     ]
     #contraction_trait = {
       indexing_maps = #contraction_accesses,
       iterator_types = ["reduction"]
     }
     %3 = vector.contract #contraction_trait %0, %1, %2
       : vector<10xf32>, vector<10xf32> into f32

     // 2D vector contraction with one contracting dimension (matmul, K = 2).
     #contraction_accesses = [
       affine_map<(i, j, k) -> (i, k)>,
       affine_map<(i, j, k) -> (k, j)>,
       affine_map<(i, j, k) -> (i, j)>
     ]
     #contraction_trait = {
       indexing_maps = #contraction_accesses,
       iterator_types = ["parallel", "parallel", "reduction"]
     }

     %3 = vector.contract #contraction_trait %0, %1, %2
       : vector<4x3xf32>, vector<3x7xf32> into vector<4x7xf32>

     // 4D to 3D vector contraction with two contracting dimensions and
     // one batch dimension (K = 3).
     #contraction_accesses = [
       affine_map<(b0, f0, f1, c0, c1) -> (c0, b0, c1, f0)>,
       affine_map<(b0, f0, f1, c0, c1) -> (b0, c1, c0, f1)>,
       affine_map<(b0, f0, f1, c0, c1) -> (b0, f0, f1)>
     ]
     #contraction_trait = {
       indexing_maps = #contraction_accesses,
       iterator_types = ["parallel", "parallel", "parallel",
                         "reduction", "reduction"]
     }

     %4 = vector.contract #contraction_trait %0, %1, %2
         : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x5xf32>

     // 4D vector contraction with two contracting dimensions and optional
     // vector mask arguments.
     %lhs_mask = vector.constant_mask [7, 8, 16, 15] : vector<7x8x16x15xi1>
     %rhs_mask = vector.constant_mask [8, 16, 7, 5] : vector<8x16x7x5xi1>

     %5 = vector.contract #contraction_trait %0, %1, %2, %lhs_mask, %rhs_mask
        : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x8x5xf32>
     ```
   }];
   let builders = [OpBuilder<
     "Builder *builder, OperationState &result, Value lhs, Value rhs, "
     "Value acc, ArrayAttr indexingMaps, ArrayAttr iteratorTypes">,
     OpBuilder<
       "Builder *builder, OperationState &result, Value lhs, Value rhs, "
       "Value acc, ArrayRef<ArrayRef<AffineExpr>> indexingExprs, "
       "ArrayRef<StringRef> iteratorTypes">];
   let extraClassDeclaration = [{
     VectorType getLhsType() {
       return lhs().getType().cast<VectorType>();
     }
     VectorType getRhsType() {
       return rhs().getType().cast<VectorType>();
     }
     Type getAccType() { return acc().getType(); }
     VectorType getLHSVectorMaskType() {
       if (llvm::size(masks()) != 2) return VectorType();
       return getOperand(3).getType().cast<VectorType>();
     }
     VectorType getRHSVectorMaskType() {
       if (llvm::size(masks()) != 2) return VectorType();
       return getOperand(4).getType().cast<VectorType>();
     }
     Type getResultType() { return getResult().getType(); }
     ArrayRef<StringRef> getTraitAttrNames();
     SmallVector<AffineMap, 4> getIndexingMaps();
     static unsigned getAccOperandIndex() { return 2; }

     // Returns the bounds of each dimension in the iteration space spanned
     // by the iterator types of this operation.
     void getIterationBounds(SmallVectorImpl<int64_t> &iterationBounds);

     // Returns a list of index maps, where there is a list entry for each
     // op indexing map attribute (i.e. one for each input and output, with
     // the output listed last). Each index map, maps from this operations
     // iteration space, to vector dimensions of the maps input/output.
     void getIterationIndexMap(
       std::vector<DenseMap<int64_t, int64_t>> &iterationIndexMap);

     std::vector<std::pair<int64_t, int64_t>> getContractingDimMap();
     std::vector<std::pair<int64_t, int64_t>> getBatchDimMap();
   }];
 }

 def Vector_ReductionOp :
   Vector_Op<"reduction", [NoSideEffect,
      PredOpTrait<"source operand and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 0>>]>,
     Arguments<(ins StrAttr:$kind, AnyVector:$vector, Variadic<AnyType>:$acc)>,
     Results<(outs AnyType:$dest)> {
   let summary = "reduction operation";
   let description = [{
     Reduces an 1-D vector "horizontally" into a scalar using the given
     operation (add/mul/min/max for int/fp and and/or/xor for int only).
     Some reductions (add/mul for fp) also allow an optional fused
     accumulator.

     Note that these operations are restricted to 1-D vectors to remain
     close to the corresponding LLVM intrinsics:

     http://llvm.org/docs/LangRef.html#experimental-vector-reduction-intrinsics

     Example:

     ```mlir
     %1 = vector.reduction "add", %0 : vector<16xf32> into f32

     %3 = vector.reduction "xor", %2 : vector<4xi32> into i32

     %4 = vector.reduction "mul", %0, %1 : vector<16xf32> into f32
     ```
   }];
   let extraClassDeclaration = [{
     VectorType getVectorType() {
       return vector().getType().cast<VectorType>();
     }
   }];
 }

 def Vector_BroadcastOp :
   Vector_Op<"broadcast", [NoSideEffect,
      PredOpTrait<"source operand and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 0>>]>,
     Arguments<(ins AnyType:$source)>,
     Results<(outs AnyVector:$vector)> {
   let summary = "broadcast operation";
   let description = [{
     Broadcasts the scalar or k-D vector value in the source operand
     to a n-D result vector such that the broadcast makes sense, i.e.,
     the source operand is duplicated to match the given rank and sizes
     in the result vector. The legality rules are:
     * the source operand must have the same element type as the result type
     * a k-D vector <s_1 x .. x s_k x type> can be broadcast to
       a n-D vector <t_1 x .. x t_n x type> if
        * k <= n, and
        * the sizes in the trailing dimensions n-k < i <= n with j=i+k-n
           match exactly as s_j = t_i or s_j = 1:
        ```
            t_1 x   ..  t_n-k x t_n-k+1 x .. x t_i x .. x t_n
                                s_1     x .. x s_j x .. x s_k
                <duplication>         <potential stretch>
        ```
     The source operand is duplicated over all the missing leading dimensions
     and stretched over the trailing dimensions where the source has a non-equal
     dimension of 1. These rules imply that any scalar broadcast (k=0) to any
     shaped vector with the same element type is always legal.

     Example:

     ```mlir
     %0 = constant 0.0 : f32
     %1 = vector.broadcast %0 : f32 to vector<16xf32>
     %2 = vector.broadcast %1 : vector<16xf32> to vector<4x16xf32>
     ```
   }];
   let extraClassDeclaration = [{
     Type getSourceType() { return source().getType(); }
     VectorType getVectorType() {
       return vector().getType().cast<VectorType>();
     }
   }];
   let assemblyFormat = "$source attr-dict `:` type($source) `to` type($vector)";
 }

 def Vector_ShuffleOp :
   Vector_Op<"shuffle", [NoSideEffect,
      PredOpTrait<"first operand v1 and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 0>>,
      PredOpTrait<"second operand v2 and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 1>>]>,
      Arguments<(ins AnyVector:$v1, AnyVector:$v2, I64ArrayAttr:$mask)>,
      Results<(outs AnyVector:$vector)> {
   let summary = "shuffle operation";
   let description = [{
     The shuffle operation constructs a permutation (or duplication) of elements
     from two input vectors, returning a vector with the same element type as
     the input and a length that is the same as the shuffle mask. The two input
     vectors must have the same element type, rank, and trailing dimension sizes
     and shuffles their values in the leading dimension (which may differ in size)
     according to the given mask. The legality rules are:
     * the two operands must have the same element type as the result
     * the two operands and the result must have the same rank and trailing
       dimension sizes, viz. given two k-D operands
               v1 : <s_1 x s_2 x .. x s_k x type> and
               v2 : <t_1 x t_2 x .. x t_k x type>
       we have s_i = t_i for all 1 < i <= k
     * the mask length equals the leading dimension size of the result
     * numbering the input vector indices left to right across the operands, all
       mask values must be within range, viz. given two k-D operands v1 and v2
       above, all mask values are in the range [0,s_1+t_1)

     Example:

     ```mlir
     %0 = vector.shuffle %a, %b[0, 3]
                : vector<2xf32>, vector<2xf32>       ; yields vector<2xf32>
     %1 = vector.shuffle %c, %b[0, 1, 2]
                : vector<2x16xf32>, vector<1x16xf32> ; yields vector<3x16xf32>
     %2 = vector.shuffle %a, %b[3, 2, 1, 0]
                : vector<2xf32>, vector<2xf32>       ; yields vector<4xf32>
     ```
   }];
   let builders = [OpBuilder<"Builder *builder, OperationState &result,"
                             "Value v1, Value v2, ArrayRef<int64_t>">];
   let extraClassDeclaration = [{
     static StringRef getMaskAttrName() { return "mask"; }
     VectorType getV1VectorType() {
       return v1().getType().cast<VectorType>();
     }
     VectorType getV2VectorType() {
       return v2().getType().cast<VectorType>();
     }
     VectorType getVectorType() {
       return vector().getType().cast<VectorType>();
     }
   }];
 }

 def Vector_ExtractElementOp :
   Vector_Op<"extractelement", [NoSideEffect,
      TypesMatchWith<"result type matches element type of vector operand",
                     "vector", "result",
                     "$_self.cast<ShapedType>().getElementType()">]>,
     Arguments<(ins AnyVector:$vector, AnySignlessInteger:$position)>,
     Results<(outs AnyType:$result)> {
   let summary = "extractelement operation";
   let description = [{
     Takes an 1-D vector and a dynamic index position and extracts the
     scalar at that position. Note that this instruction resembles
     vector.extract, but is restricted to 1-D vectors and relaxed
     to dynamic indices. It is meant to be closer to LLVM's version:
     https://llvm.org/docs/LangRef.html#extractelement-instruction

     Example:

     ```mlir
     %c = constant 15 : i32
     %1 = vector.extractelement %0[%c : i32]: vector<16xf32>
     ```
   }];
   let extraClassDeclaration = [{
     VectorType getVectorType() {
       return vector().getType().cast<VectorType>();
     }
   }];

   let assemblyFormat = [{
     $vector `[` $position `:` type($position) `]` attr-dict `:` type($vector)
   }];
 }

 def Vector_ExtractOp :
   Vector_Op<"extract", [NoSideEffect,
      PredOpTrait<"operand and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 0>>]>,
     Arguments<(ins AnyVector:$vector, I64ArrayAttr:$position)>,
     Results<(outs AnyType)> {
   let summary = "extract operation";
   let description = [{
     Takes an n-D vector and a k-D position and extracts the (n-k)-D vector at
     the proper position. Degenerates to an element type in the 0-D case.

     Example:

     ```mlir
     %1 = vector.extract %0[3]: vector<4x8x16xf32>
     %2 = vector.extract %0[3, 3, 3]: vector<4x8x16xf32>
     ```
   }];
   let builders = [OpBuilder<
     "Builder *builder, OperationState &result, Value source,"
     "ArrayRef<int64_t>">];
   let extraClassDeclaration = [{
     static StringRef getPositionAttrName() { return "position"; }
     VectorType getVectorType() {
       return vector().getType().cast<VectorType>();
     }
   }];
 }

 def Vector_ExtractSlicesOp :
   Vector_Op<"extract_slices", [NoSideEffect]>,
     Arguments<(ins AnyVector:$vector, I64ArrayAttr:$sizes,
                    I64ArrayAttr:$strides)>,
     Results<(outs TupleOf<[AnyVector]>)> {
   let summary = "vector extract slices operation";
   let description = [{
     Takes an N-d vector and returns a tuple of vector slices of 'vector',
     based on 'sizes' and 'strides' parameters.

     The arguments 'sizes' and 'strides' represent a specification for
     generating the unrolling of 'vector' shape, which has all slices of shape
     'sizes' except for slices at dimension boundaries when 'vector' dimension
     sizes are not a multiple of 'sizes'.

     Each slice is returned at the tuple element index corresponding to the
     linear index of the slice w.r.t the unrolling scheme represented by 'sizes'.
     Currently, only unit strides are supported.

     Example:

     ```mlir
     %0 = vector.transfer_read ...: vector<4x2xf32>

     %1 = vector.extract_slices %0, [2, 2], [1, 1]
       : vector<4x2xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>>

     // Example with partial slices at dimension boundaries.
     %2 = vector.transfer_read ...: vector<4x3xf32>

     %3 = vector.extract_slices %2, [2, 2], [1, 1]
       : vector<4x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>,
                                    vector<2x2xf32>, vector<2x1xf32>>
     ```
   }];
   let builders = [OpBuilder<
     "Builder *builder, OperationState &result, TupleType tupleType, " #
     "Value vector, ArrayRef<int64_t> sizes, " #
     "ArrayRef<int64_t> strides">];
   let extraClassDeclaration = [{
     VectorType getSourceVectorType() {
       return vector().getType().cast<VectorType>();
     }
     TupleType getResultTupleType() {
       return getResult().getType().cast<TupleType>();
     }
     void getSizes(SmallVectorImpl<int64_t> &results);
     void getStrides(SmallVectorImpl<int64_t> &results);
     static StringRef getSizesAttrName() { return "sizes"; }
     static StringRef getStridesAttrName() { return "strides"; }
   }];
   let assemblyFormat = [{
     $vector `,` $sizes `,` $strides attr-dict `:` type($vector) `into`
     type(results)
   }];
 }

 def Vector_FMAOp :
   Op<Vector_Dialect, "fma", [NoSideEffect,
                              AllTypesMatch<["lhs", "rhs", "acc", "result"]>]>,
     Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, AnyVector:$acc)>,
     Results<(outs AnyVector:$result)> {
   let summary = "vector fused multiply-add";
   let description = [{
     Multiply-add expressions operate on n-D vectors and compute a fused
     pointwise multiply-and-accumulate: `$result = `$lhs * $rhs + $acc`.
     All operands and result have the same vector type. The semantics
     of the operation correspond to those of the `llvm.fma`
     [intrinsic](https://llvm.org/docs/LangRef.html#int-fma). In the
     particular case of lowering to LLVM, this is guaranteed to lower
     to the `llvm.fma.*` intrinsic.

     Example:

     ```mlir
     %3 = vector.fma %0, %1, %2: vector<8x16xf32>
     ```
   }];
   // Fully specified by traits.
   let verifier = ?;
   let assemblyFormat = "$lhs `,` $rhs `,` $acc attr-dict `:` type($lhs)";
   let builders = [OpBuilder<
     "Builder *b, OperationState &result, Value lhs, Value rhs, Value acc",
     "build(b, result, lhs.getType(), lhs, rhs, acc);">];
   let extraClassDeclaration = [{
     VectorType getVectorType() { return lhs().getType().cast<VectorType>(); }
   }];
 }

 def Vector_InsertElementOp :
   Vector_Op<"insertelement", [NoSideEffect,
      TypesMatchWith<"source operand type matches element type of result",
                     "result", "source",
                     "$_self.cast<ShapedType>().getElementType()">,
      AllTypesMatch<["dest", "result"]>]>,
      Arguments<(ins AnyType:$source, AnyVector:$dest,
                     AnySignlessInteger:$position)>,
      Results<(outs AnyVector:$result)> {
   let summary = "insertelement operation";
   let description = [{
     Takes a scalar source, an 1-D destination vector and a dynamic index
     position and inserts the source into the destination at the proper
     position.  Note that this instruction resembles vector.insert, but
     is restricted to 1-D vectors and relaxed to dynamic indices. It is
     meant to be closer to LLVM's version:
     https://llvm.org/docs/LangRef.html#insertelement-instruction

     Example:

     ```mlir
     %c = constant 15 : i32
     %f = constant 0.0f : f32
     %1 = vector.insertelement %f, %0[%c : i32]: vector<16xf32>
     ```
   }];
   let extraClassDeclaration = [{
     Type getSourceType() { return source().getType(); }
     VectorType getDestVectorType() {
       return dest().getType().cast<VectorType>();
     }
   }];

   let assemblyFormat = [{
     $source `,` $dest `[` $position `:` type($position) `]` attr-dict `:`
     type($result)
   }];
 }

 def Vector_InsertOp :
   Vector_Op<"insert", [NoSideEffect,
      PredOpTrait<"source operand and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 0>>,
      AllTypesMatch<["dest", "res"]>]>,
      Arguments<(ins AnyType:$source, AnyVector:$dest, I64ArrayAttr:$position)>,
      Results<(outs AnyVector:$res)> {
   let summary = "insert operation";
   let description = [{
     Takes an n-D source vector, an (n+k)-D destination vector and a k-D position
     and inserts the n-D source into the (n+k)-D destination at the proper
     position. Degenerates to a scalar source type when n = 0.

     Example:

     ```mlir
     %2 = vector.insert %0, %1[3] : vector<8x16xf32> into vector<4x8x16xf32>
     %5 = vector.insert %3, %4[3, 3, 3] : f32 into vector<4x8x16xf32>
     ```
   }];
   let assemblyFormat = [{
     $source `,` $dest $position attr-dict `:` type($source) `into` type($dest)
   }];

   let builders = [OpBuilder<
     "Builder *builder, OperationState &result, Value source, " #
     "Value dest, ArrayRef<int64_t>">];
   let extraClassDeclaration = [{
     static StringRef getPositionAttrName() { return "position"; }
     Type getSourceType() { return source().getType(); }
     VectorType getDestVectorType() {
       return dest().getType().cast<VectorType>();
     }
   }];
 }

 def Vector_InsertSlicesOp :
   Vector_Op<"insert_slices", [NoSideEffect]>,
     Arguments<(ins TupleOf<[AnyVector]>:$vectors, I64ArrayAttr:$sizes,
                    I64ArrayAttr:$strides)>,
     Results<(outs AnyVector)> {
   let summary = "vector insert slices operation";
   let description = [{
     Takes a tuple of vector slices and inserts them into the vector result
     according to the 'sizes' and 'strides' parameters.

     The arguments 'sizes' and 'strides' represent a specification for
     generating the unrolling of 'vector' shape, which has all slices of shape
     'sizes' except for slices at dimension boundaries when 'vector' dimension
     sizes are not a multiple of 'sizes'.

     Each slice in 'vectors' is at the tuple element index corresponding to the
     linear index of the slice w.r.t the unrolling scheme represented by 'sizes'.
     Currently, only unit strides are supported.

     Example:

     ```mlir
     %0 = vector.extract_slices %0, [2, 2], [1, 1]
       : vector<4x2xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>>

     %1 = vector.insert_slices %0, [2, 2], [1, 1]
       : tuple<vector<2x2xf32>, vector<2x2xf32>> into vector<4x2xf32>

     // Example with partial slices at dimension boundaries.
     %3 = vector.extract_slices %2, [2, 2], [1, 1]
       : vector<4x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>,
                                    vector<2x2xf32>, vector<2x1xf32>>

     %4 = vector.insert_slices %3, [2, 2], [1, 1]
       : tuple<vector<2x2xf32>, vector<2x1xf32>,
               vector<2x2xf32>, vector<2x1xf32>> into vector<4x3xf32>
     ```
   }];

   let extraClassDeclaration = [{
     TupleType getSourceTupleType() {
       return vectors().getType().cast<TupleType>();
     }
     VectorType getResultVectorType() {
       return getResult().getType().cast<VectorType>();
     }
     void getSizes(SmallVectorImpl<int64_t> &results);
     void getStrides(SmallVectorImpl<int64_t> &results);
     static StringRef getSizesAttrName() { return "sizes"; }
     static StringRef getStridesAttrName() { return "strides"; }
   }];
   let assemblyFormat = [{
     $vectors `,` $sizes `,` $strides attr-dict `:` type($vectors) `into`
     type(results)
   }];
 }

 def Vector_InsertStridedSliceOp :
   Vector_Op<"insert_strided_slice", [NoSideEffect,
     PredOpTrait<"operand #0 and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 0>>,
     AllTypesMatch<["dest", "res"]>]>,
     Arguments<(ins AnyVector:$source, AnyVector:$dest, I64ArrayAttr:$offsets,
                I64ArrayAttr:$strides)>,
     Results<(outs AnyVector:$res)> {
   let summary = "strided_slice operation";
   let description = [{
     Takes a k-D source vector, an n-D destination vector (n >= k), n-sized
     `offsets` integer array attribute, a k-sized `strides` integer array attribute
     and inserts the k-D source vector as a strided subvector at the proper offset
     into the n-D destination vector.

     At the moment strides must contain only 1s.

     Returns an n-D vector that is a copy of the n-D destination vector in which
     the last k-D dimensions contain the k-D source vector elements strided at
     the proper location as specified by the offsets.

     Example:

     ```mlir
     %2 = vector.insert_strided_slice %0, %1
         {offsets = [0, 0, 2], strides = [1, 1]}:
       vector<2x4xf32> into vector<16x4x8xf32>
     ```
   }];

   let assemblyFormat = [{
     $source `,` $dest attr-dict `:` type($source) `into` type($dest)
   }];

   let builders = [OpBuilder<
     "Builder *builder, OperationState &result, Value source, Value dest, " #
     "ArrayRef<int64_t> offsets, ArrayRef<int64_t> strides">];
   let extraClassDeclaration = [{
     static StringRef getOffsetsAttrName() { return "offsets"; }
     static StringRef getStridesAttrName() { return "strides"; }
     VectorType getSourceVectorType() {
       return source().getType().cast<VectorType>();
     }
     VectorType getDestVectorType() {
       return dest().getType().cast<VectorType>();
     }
   }];
 }

 def Vector_OuterProductOp :
   Vector_Op<"outerproduct", [NoSideEffect, SameOperandsAndResultElementType]>,
     Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, Variadic<AnyVector>:$acc)>,
     Results<(outs AnyVector)> {
   let summary = "vector outerproduct with optional fused add";
   let description = [{
     Takes 2 1-D vectors and returns the 2-D vector containing the outer-product.

     An optional extra 2-D vector argument may be specified in which case the
     operation returns the sum of the outer-product and the extra vector. In this
     multiply-accumulate scenario, the rounding mode is that obtained by
     guaranteeing that a fused-multiply add operation is emitted. When lowered to
     the LLVMIR dialect, this form emits `llvm.intr.fma`, which is guaranteed to
     lower to actual `fma` instructions on x86.

     Example:

     ```
     %2 = vector.outerproduct %0, %1: vector<4xf32>, vector<8xf32>
     return %2: vector<4x8xf32>

     %3 = vector.outerproduct %0, %1, %2:
       vector<4xf32>, vector<8xf32>, vector<4x8xf32>
     return %3: vector<4x8xf32>
     ```
   }];
   let extraClassDeclaration = [{
     VectorType getOperandVectorTypeLHS() {
       return lhs().getType().cast<VectorType>();
     }
     VectorType getOperandVectorTypeRHS() {
       return rhs().getType().cast<VectorType>();
     }
     VectorType getOperandVectorTypeACC() {
       return (llvm::size(acc()) == 0) ? VectorType() :
         (*acc().begin()).getType().cast<VectorType>();
     }
     VectorType getVectorType() {
       return getResult().getType().cast<VectorType>();
     }
   }];
 }

 // TODO(andydavis) Add transformation which decomposes ReshapeOp into an
 // optimized sequence of vector rotate/shuffle/select operations.
 def Vector_ReshapeOp :
   Vector_Op<"reshape", [AttrSizedOperandSegments, NoSideEffect]>,
     Arguments<(ins AnyVector:$vector, Variadic<Index>:$input_shape,
                Variadic<Index>:$output_shape,
                I64ArrayAttr:$fixed_vector_sizes)>,
     Results<(outs AnyVector:$result)> {
   let summary = "vector reshape operation";
   let description = [{
     Reshapes its vector operand from 'input_shape' to 'output_shape' maintaining
     fixed vector dimension 'fixed_vector_sizes' on the innermost vector
     dimensions.

     The parameters 'input_shape' and 'output_shape' represent valid data shapes
     across fixed vector shapes. For example, if a vector has a valid data
     shape [6] with fixed vector size [8], then the valid data elements are
     assumed to be stored at the beginning of the vector with the remaining
     vector elements undefined.

     In the examples below, valid data elements are represented by an alphabetic
     character, and undefined data elements are represented by '-'.

     Example

       vector<1x8xf32> with valid data shape [6], fixed vector sizes [8]

                 input: [a, b, c, d, e, f]

            layout map: (d0) -> (d0 floordiv 8, d0 mod 8)

         vector layout: [a, b, c, d, e, f, -, -]

     Example

       vector<2x8xf32> with valid data shape [10], fixed vector sizes [8]

                 input: [a, b, c, d, e, f, g, h, i, j]

            layout map: (d0) -> (d0 floordiv 8, d0 mod 8)

         vector layout: [[a, b, c, d, e, f, g, h],
                         [i, j, -, -, -, -, -, -]]

     Example

       vector<2x2x2x3xf32> with valid data shape [3, 5], fixed vector sizes
       [2, 3]

                 input: [[a, b, c, d, e],
                         [f, g, h, i, j],
                         [k, l, m, n, o]]

            layout map: (d0, d1) -> (d0 floordiv 3, d1 floordiv 5,
                                     d0 mod 3, d1 mod 5)

         vector layout: [[[[a, b, c],
                           [f, g, h]]
                          [[d, e, -],
                           [i, j, -]]],
                         [[[k, l, m],
                           [-, -, -]]
                          [[n, o, -],
                           [-, -, -]]]]

     Example

       %1 = vector.reshape %0, [%c3, %c6], [%c2, %c9], [4]
         : vector<3x2x4xf32> to vector<2x3x4xf32>

              input: [[a, b, c, d, e, f],
                      [g, h, i, j, k, l],
                      [m, n, o, p, q, r]]

         layout map: (d0, d1) -> (d0, d1 floordiv 4, d1 mod 4)


       Input vector:  [[[a, b, c, d],
                        [e, f, -, -]],
                       [[g, h, i, j],
                        [k, l, -, -]],
                       [[m, n, o, p],
                        [q, r, -, -]]]

       Output vector:  [[[a, b, c, d],
                         [e, f, g, h],
                         [i, -, -, -]],
                        [[j, k, l, m],
                         [n, o, p, q],
                         [r, -, -, -]]]
   }];

   let extraClassDeclaration = [{
     VectorType getInputVectorType() {
       return vector().getType().cast<VectorType>();
     }
     VectorType getOutputVectorType() {
       return getResult().getType().cast<VectorType>();
     }

     /// Returns as integer value the number of input shape operands.
     int64_t getNumInputShapeSizes() { return input_shape().size(); }

     /// Returns as integer value the number of output shape operands.
     int64_t getNumOutputShapeSizes() { return output_shape().size(); }

     void getFixedVectorSizes(SmallVectorImpl<int64_t> &results);

     static StringRef getFixedVectorSizesAttrName() {
       return "fixed_vector_sizes";
     }
     static StringRef getInputShapeAttrName() { return "input_shape"; }
     static StringRef getOutputShapeAttrName() { return "output_shape"; }
   }];

   let assemblyFormat = [{
     $vector `,` `[` $input_shape `]` `,` `[` $output_shape `]` `,`
     $fixed_vector_sizes attr-dict `:` type($vector) `to` type($result)
   }];
 }

 def Vector_StridedSliceOp :
   Vector_Op<"strided_slice", [NoSideEffect,
     PredOpTrait<"operand and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 0>>]>,
     Arguments<(ins AnyVector:$vector, I64ArrayAttr:$offsets,
                I64ArrayAttr:$sizes, I64ArrayAttr:$strides)>,
     Results<(outs AnyVector)> {
   let summary = "strided_slice operation";
   let description = [{
     Takes an n-D vector, k-D `offsets` integer array attribute, a k-sized
     `sizes` integer array attribute, a k-sized `strides` integer array
     attribute and extracts the n-D subvector at the proper offset.

     At the moment strides must contain only 1s.
     // TODO(ntv) support non-1 strides.

     Returns an n-D vector where the first k-D dimensions match the `sizes`
     attribute. The returned subvector contains the elements starting at offset
     `offsets` and ending at `offsets + sizes`.

     Example:

     ```mlir
     %1 = vector.strided_slice %0
         {offsets = [0, 2], sizes = [2, 4], strides = [1, 1]}:
       vector<4x8x16xf32> to vector<2x4x16xf32>

     // TODO(ntv) Evolve to a range form syntax similar to:
     %1 = vector.strided_slice %0[0:2:1][2:4:1]
       vector<4x8x16xf32> to vector<2x4x16xf32>
     ```
   }];
   let builders = [OpBuilder<
     "Builder *builder, OperationState &result, Value source, " #
     "ArrayRef<int64_t> offsets, ArrayRef<int64_t> sizes, " #
     "ArrayRef<int64_t> strides">];
   let extraClassDeclaration = [{
     static StringRef getOffsetsAttrName() { return "offsets"; }
     static StringRef getSizesAttrName() { return "sizes"; }
     static StringRef getStridesAttrName() { return "strides"; }
     VectorType getVectorType(){ return vector().getType().cast<VectorType>(); }
     void getOffsets(SmallVectorImpl<int64_t> &results);
   }];
   let hasCanonicalizer = 1;
   let assemblyFormat = "$vector attr-dict `:` type($vector) `to` type(results)";
 }

 def Vector_TransferReadOp :
   Vector_Op<"transfer_read">,
     Arguments<(ins AnyMemRef:$memref, Variadic<Index>:$indices,
                AffineMapAttr:$permutation_map, AnyType:$padding)>,
     Results<(outs AnyVector:$vector)> {

   let summary = "Reads a supervector from memory into an SSA vector value.";

   let description = [{
     The `vector.transfer_read` op performs a blocking read from a slice within
     a [MemRef](../LangRef.md#memref-type) supplied as its first operand
     into a [vector](../LangRef.md#vector-type) of the same base elemental type.

     A memref operand with vector element type, must have its vector element
     type match a suffix (shape and element type) of the vector (e.g.
     memref<3x2x6x4x3xf32>, vector<1x1x4x3xf32>).

     The slice is further defined by a full-rank index within the MemRef,
     supplied as the operands `2 .. 1 + rank(memref)`. The permutation_map
     [attribute](../LangRef.md#attributes) is an
     [affine-map](Affine.md#affine-maps) which specifies the transposition on the
     slice to match the vector shape. The size of the slice is specified by the
     size of the vector, given as the return type. An `ssa-value` of the same
     elemental type as the MemRef is provided as the last operand to specify
     padding in the case of out-of-bounds accesses. This operation is called
     'read' by opposition to 'load' because the super-vector granularity is
     generally not representable with a single hardware register.
     A `vector.transfer_read` is thus a mid-level
     abstraction that supports super-vectorization with non-effecting padding for
     full-tile-only code.

     More precisely, let's dive deeper into the permutation_map for the following
     MLIR:

     ```mlir
     vector.transfer_read %A[%expr1, %expr2, %expr3, %expr4]
       { permutation_map : (d0,d1,d2,d3) -> (d2,0,d0) } :
       memref<?x?x?x?xf32>, vector<3x4x5xf32>
     ```

     This operation always reads a slice starting at `%A[%expr1, %expr2, %expr3,
     %expr4]`. The size of the slice is 3 along d2 and 5 along d0, so the slice
     is: `%A[%expr1 : %expr1 + 5, %expr2, %expr3:%expr3 + 3, %expr4]`

     That slice needs to be read into a `vector<3x4x5xf32>`. Since the
     permutation map is not full rank, there must be a broadcast along vector
     dimension `1`.

     A notional lowering of vector.transfer_read could generate code resembling:

     ```mlir
     // %expr1, %expr2, %expr3, %expr4 defined before this point
     %tmp = alloc() : vector<3x4x5xf32>
     %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
     for %i = 0 to 3 {
       affine.for %j = 0 to 4 {
         affine.for %k = 0 to 5 {
           %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] :
             memref<?x?x?x?xf32>
           store %tmp[%i, %j, %k] : vector<3x4x5xf32>
     }}}
     %c0 = constant 0 : index
     %vec = load %view_in_tmp[%c0] : vector<3x4x5xf32>
     ```

     On a GPU one could then map `i`, `j`, `k` to blocks and threads. Notice that
     the temporary storage footprint is `3 * 5` values but `3 * 4 * 5` values are
     actually transferred between `%A` and `%tmp`.

     Alternatively, if a notional vector broadcast operation were available, the
     lowered code would resemble:

     ```mlir
     // %expr1, %expr2, %expr3, %expr4 defined before this point
     %tmp = alloc() : vector<3x4x5xf32>
     %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
     for %i = 0 to 3 {
       affine.for %k = 0 to 5 {
         %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] :
           memref<?x?x?x?xf32>
         store %tmp[%i, 0, %k] : vector<3x4x5xf32>
     }}
     %c0 = constant 0 : index
     %tmpvec = load %view_in_tmp[%c0] : vector<3x4x5xf32>
     %vec = broadcast %tmpvec, 1 : vector<3x4x5xf32>
     ```

     where `broadcast` broadcasts from element 0 to all others along the
     specified dimension. This time, the temporary storage footprint is `3 * 5`
     values which is the same amount of data as the `3 * 5` values transferred.
     An additional `1` broadcast is required. On a GPU this broadcast could be
     implemented using a warp-shuffle if loop `j` were mapped to `threadIdx.x`.

     Syntax
     ```
     operation ::= ssa-id `=` `vector.transfer_read` ssa-use-list
       `{` attribute-entry `} :` memref-type `,` vector-type
     ```

     Example:

     ```mlir
     // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32>
     // and pad with %f0 to handle the boundary case:
     %f0 = constant 0.0f : f32
     for %i0 = 0 to %0 {
       affine.for %i1 = 0 to %1 step 256 {
         affine.for %i2 = 0 to %2 step 32 {
           %v = vector.transfer_read %A[%i0, %i1, %i2], (%f0)
                {permutation_map: (d0, d1, d2) -> (d2, d1)} :
                memref<?x?x?xf32>, vector<32x256xf32>
     }}}

     // Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into
     // vector<128xf32>. The underlying implementation will require a 1-D vector
     // broadcast:
     for %i0 = 0 to %0 {
       affine.for %i1 = 0 to %1 {
         %3 = vector.transfer_read %A[%i0, %i1]
              {permutation_map: (d0, d1) -> (0)} :
              memref<?x?xf32>, vector<128xf32>
       }
     }

     // Read from a memref with vector element type.
     %4 = vector.transfer_read %arg1[%c3, %c3], %vf0
       {permutation_map = (d0, d1)->(d0, d1)}
         : memref<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
     ```
   }];

   let extraClassDeclaration = [{
     MemRefType getMemRefType() {
       return memref().getType().cast<MemRefType>();
     }
     VectorType getVectorType() {
       return vector().getType().cast<VectorType>();
     }
   }];
 }

 def Vector_TransferWriteOp :
   Vector_Op<"transfer_write">,
     Arguments<(ins AnyVector:$vector, AnyMemRef:$memref,
                Variadic<Index>:$indices,
                AffineMapAttr:$permutation_map)> {

   let summary = "The vector.transfer_write op writes a supervector to memory.";

   let description = [{
     The `vector.transfer_write` performs a blocking write from a
     [vector](../LangRef.md#vector-type), supplied as its first operand, into a
     slice within a [MemRef](../LangRef.md#memref-type) of the same base
     elemental type, supplied as its second operand.

     A vector memref operand must have its vector element type match a suffix
     (shape and element type) of the vector (e.g. memref<3x2x6x4x3xf32>,
     vector<1x1x4x3xf32>).

     The slice is further defined by a full-rank index within the MemRef,
     supplied as the operands `3 .. 2 + rank(memref)`.
     The permutation_map [attribute](../LangRef.md#attributes) is an
     [affine-map](Affine.md#affine-maps) which specifies the transposition on the
     slice to match the vector shape. The size of the slice is specified by the
     size of the vector. This operation is called 'write' by opposition to
     'store' because the super-vector granularity is generally not representable
     with a single hardware register. A `vector.transfer_write` is thus a
     mid-level abstraction that supports super-vectorization with non-effecting
     padding for full-tile-only code. It is the responsibility of
     `vector.transfer_write`'s implementation to ensure the memory writes are
     valid. Different lowerings may be pertinent depending on the hardware
     support.

     Example:

     ```mlir
     // write vector<16x32x64xf32> into the slice
     //   `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`:
     for %i0 = 0 to %0 {
       affine.for %i1 = 0 to %1 step 32 {
         affine.for %i2 = 0 to %2 step 64 {
           affine.for %i3 = 0 to %3 step 16 {
             %val = `ssa-value` : vector<16x32x64xf32>
             vector.transfer_write %val, %A[%i0, %i1, %i2, %i3]
               {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} :
               vector<16x32x64xf32>, memref<?x?x?x?xf32>
     }}}}

     // write to a memref with vector element type.
     vector.transfer_write %4, %arg1[%c3, %c3]
       {permutation_map = (d0, d1)->(d0, d1)}
         : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>>
     ```
   }];

   let extraClassDeclaration = [{
     VectorType getVectorType() {
       return vector().getType().cast<VectorType>();
     }
     MemRefType getMemRefType() {
       return memref().getType().cast<MemRefType>();
     }
   }];
   let assemblyFormat = [{
     $vector `,` $memref `[` $indices `]` attr-dict `:` type($vector) `,`
     type($memref)
   }];
 }

 def Vector_ShapeCastOp :
   Vector_Op<"shape_cast", [NoSideEffect]>,
     Arguments<(ins AnyTypeOf<[AnyVector, TupleOf<[AnyVector]>]>:$source)>,
     Results<(outs AnyTypeOf<[AnyVector, TupleOf<[AnyVector]>]>:$result)> {
   let summary = "shape_cast casts between vector shapes";
   let description = [{
     The shape_cast operation casts between an n-D source vector shape and
     a k-D result vector shape (the element type remains the same).

     If reducing rank (n > k), result dimension sizes must be a product
     of contiguous source dimension sizes.
     If expanding rank (n < k), source dimensions must factor into a
     contiguous sequence of destination dimension sizes.
     Each source dim is expanded (or contiguous sequence of source dims combined)
     in source dimension list order (i.e. 0 <= i < n), to produce a contiguous
     sequence of result dims (or a single result dim), in result dimension list
     order (i.e. 0 <= j < k). The product of all source dimension sizes and all
     result dimension sizes must match.

     If the source/result types are a tuple of vectors, the casting operation
     described above is applied to each source/result tuple element pair.

     It is currently assumed that this operation does not require moving data,
     and that it will be folded away before lowering vector operations.

     There is an exception to the folding expectation when targeting
     llvm.intr.matrix operations. We need a type conversion back and forth from a
     2-D MLIR vector to a 1-D flattened LLVM vector.shape_cast lowering to LLVM
     is supported in that particular case, for now.

     Example:

     ```mlir
     // Example casting to a lower vector rank.
     %1 = vector.shape_cast %0 : vector<5x1x4x3xf32> to vector<20x3xf32>

     // Example casting to a higher vector rank.
     %3 = vector.shape_cast %2 : vector<10x12x8xf32> to vector<5x2x3x4x8xf32>

     // Example casting a tuple of vectors of same rank, where tuple elements
     // may have different shapes.
     %5 = vector.shape_cast %4 : tuple<vector<3x4x2xf32>, vector<3x3x2xf32>> to
                                 tuple<vector<12x2xf32>, vector<9x2xf32>>
     ```
   }];
   let extraClassDeclaration = [{
     VectorType getSourceVectorType() {
       return source().getType().cast<VectorType>();
     }
     VectorType getResultVectorType() {
       return getResult().getType().cast<VectorType>();
     }
   }];
   let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)";
 }

 def Vector_TypeCastOp :
   Vector_Op<"type_cast", [NoSideEffect]>,
     Arguments<(ins StaticShapeMemRefOf<[AnyType]>:$memref)>,
     Results<(outs AnyMemRef)> {
   let summary = "type_cast op converts a scalar memref to a vector memref";
   let description = [{
     Performs a conversion from a memref with scalar element to a memref with a
     *single* vector element, copying the shape of the memref to the vector. This
     is the minimal viable operation that is required to makeke
     super-vectorization operational. It can be seen as a special case of the
     `view` operation but scoped in the super-vectorization context.

     Syntax:

     ```
     operation ::= `vector.type_cast` ssa-use : memref-type to memref-type
     ```

     Example:

     ```mlir
     %A  = alloc() : memref<5x4x3xf32>
     %VA = vector.type_cast %A : memref<5x4x3xf32> to memref<vector<5x4x3xf32>>
     ```
   }];

   let builders = [OpBuilder<
     "Builder *builder, OperationState &result, Value source">];

   let parser = [{
     return impl::parseCastOp(parser, result);
   }];

   let extraClassDeclaration = [{
     MemRefType getMemRefType() {
       return memref().getType().cast<MemRefType>();
     }
     MemRefType getResultMemRefType() {
       return getResult().getType().cast<MemRefType>();
     }
   }];
 }

 def Vector_ConstantMaskOp :
   Vector_Op<"constant_mask", [NoSideEffect]>,
     Arguments<(ins I64ArrayAttr:$mask_dim_sizes)>,
     Results<(outs VectorOf<[I1]>)> {
   let summary = "creates a constant vector mask";
   let description = [{
     Creates and returns a vector mask where elements of the result vector
     are set to '0' or '1', based on whether the element indices are contained
     within a hyper-rectangular region specified by the 'mask_dim_sizes'
     array attribute argument. Each element of the 'mask_dim_sizes' array,
     specifies an exclusive upper bound [0, mask-dim-size-element-value)
     for a unique dimension in the vector result. The conjunction of the ranges
     define a hyper-rectangular region within which elements values are set to 1
     (otherwise element values are set to 0).

     Example:

     ```mlir
     // create a constant vector mask of size 4x3xi1 with elements in range
     // 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
     %1 = vector.constant_mask [3, 2] : vector<4x3xi1>

     print %1
                   columns
                 0    1    2
               |------------
             0 | 1    1    0
       rows  1 | 1    1    0
             2 | 1    1    0
             3 | 0    0    0
     ```
   }];

   let extraClassDeclaration = [{
     static StringRef getMaskDimSizesAttrName() { return "mask_dim_sizes"; }
   }];
   let assemblyFormat = "$mask_dim_sizes attr-dict `:` type(results)";
 }

 def Vector_CreateMaskOp :
   Vector_Op<"create_mask", [NoSideEffect]>,
     Arguments<(ins Variadic<Index>:$operands)>, Results<(outs VectorOf<[I1]>)> {
   let summary = "creates a vector mask";
   let description = [{
     Creates and returns a vector mask where elements of the result vector
     are set to '0' or '1', based on whether the element indices are contained
     within a hyper-rectangular region specified by the operands. Specifically,
     each operand specifies a range [0, operand-value) for a unique dimension in
     the vector result. The conjunction of the operand ranges define a
     hyper-rectangular region within which elements values are set to 1
     (otherwise element values are set to 0).

     Example:

     ```mlir
     // create a vector mask of size 4x3xi1 where elements in range
     // 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
     %1 = vector.create_mask %c3, %c2 : vector<4x3xi1>

     print %1
                   columns
                 0    1    2
               |------------
             0 | 1    1    0
       rows  1 | 1    1    0
             2 | 1    1    0
             3 | 0    0    0
     ```
   }];

   let hasCanonicalizer = 1;
   let assemblyFormat = "$operands attr-dict `:` type(results)";
 }

 def Vector_TupleOp :
   Vector_Op<"tuple", [NoSideEffect]>,
     Arguments<(ins Variadic<AnyVector>:$vectors)>,
     Results<(outs TupleOf<[AnyVector]>)> {
   let summary = "make tuple of vectors operation";
   let description = [{
     Returns a tuple of its operands 'vectors'.

     Note that this operation is used during the vector op unrolling
     transformation and should be removed before lowering to lower-level
     dialects.


     Example:

     ```mlir
     %0 = vector.transfer_read ... : vector<2x2xf32>
     %1 = vector.transfer_read ... : vector<2x1xf32>
     %2 = vector.transfer_read ... : vector<2x2xf32>
     %3 = vector.transfer_read ... : vector<2x1xf32>

     %4 = vector.tuple %0, %1, %2, %3
       : vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32>
     ```
   }];

   let extraClassDeclaration = [{
     TupleType getResultTupleType() {
       return getResult().getType().cast<TupleType>();
     }
   }];
 }

 def Vector_TransposeOp :
   Vector_Op<"transpose", [NoSideEffect,
     PredOpTrait<"operand and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 0>>]>,
     Arguments<(ins AnyVector:$vector, I64ArrayAttr:$transp)>,
     Results<(outs AnyVector:$result)> {
   let summary = "vector transpose operation";
   let description = [{
     Takes a n-D vector and returns the transposed n-D vector defined by
     the permutation of ranks in the n-sized integer array attribute.
     In the operation

     ```mlir
     %1 = vector.transpose %0, [i_1, .., i_n]
       : vector<d_1 x .. x d_n x f32>
       to vector<d_trans[0] x .. x d_trans[n-1] x f32>
     ```

     the transp array [i_1, .., i_n] must be a permutation of [0, .., n-1].

     Example:

     ```mlir
     %1 = vector.transpose %0, [1, 0] : vector<2x3xf32> to vector<3x2xf32>

      [ [a, b, c],       [ [a, d],
        [d, e, f] ]  ->    [b, e],
                           [c, f] ]
     ```
   }];
   let extraClassDeclaration = [{
     VectorType getVectorType() {
       return vector().getType().cast<VectorType>();
     }
     VectorType getResultType() {
       return result().getType().cast<VectorType>();
     }
     void getTransp(SmallVectorImpl<int64_t> &results);
   }];
   let assemblyFormat = [{
     $vector `,` $transp attr-dict `:` type($vector) `to` type($result)
   }];
   let hasCanonicalizer = 1;
   let hasFolder = 1;
 }

 def Vector_TupleGetOp :
   Vector_Op<"tuple_get", [NoSideEffect]>,
     Arguments<(ins TupleOf<[AnyVector]>:$vectors, APIntAttr:$index)>,
     Results<(outs AnyVector)> {
   let summary = "vector tuple get operation";
   let description = [{
     Returns the tuple element of 'vectors' at 'index'.

     Note that this operation is used during the vector op unrolling
     transformation and should be removed before lowering to lower-level
     dialects.

     Example:

     ```mlir
     %4 = vector.tuple %0, %1, %2, %3
       : vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32>>

     %5 = vector.tuple_get %4, 1
       : tuple<vector<2x2xf32>, vector<2x1xf32>,
               vector<2x2xf32>, vector<2x1xf32>>
     ```
   }];

   let extraClassDeclaration = [{
     VectorType getResultVectorType() {
       return getResult().getType().cast<VectorType>();
     }
     int64_t getIndex() {
       return getAttrOfType<IntegerAttr>("index").getValue().getSExtValue();
     }
     static StringRef getIndexAttrName() { return "index"; }
   }];
   let hasFolder = 1;
 }

 def Vector_PrintOp :
   Vector_Op<"print", []>, Arguments<(ins AnyType:$source)> {
   let summary = "print operation (for testing and debugging)";
   let description = [{
     Prints the source vector (or scalar) to stdout in human readable
     format (for testing and debugging). No return value.

     Example:

     ```mlir
     %0 = constant 0.0 : f32
     %1 = vector.broadcast %0 : f32 to vector<4xf32>
     vector.print %1 : vector<4xf32>

     when lowered to LLVM, the vector print is unrolled into
     elementary printing method calls that at runtime will yield

     ( 0.0, 0.0, 0.0, 0.0 )

     on stdout when linked with a small runtime support library,
     which only needs to provide a few printing methods (single
     value for all data types, opening/closing bracket, comma,
     newline).
     ```
   }];
   let verifier = ?;
   let extraClassDeclaration = [{
     Type getPrintType() {
       return source().getType();
     }
   }];
   let assemblyFormat = "$source attr-dict `:` type($source)";
 }

 //===----------------------------------------------------------------------===//
 // Ops used for supporting progressive lowering and conversion type changes.
 //===----------------------------------------------------------------------===//

 /// Vector dialect matrix multiplication op that operates on flattened 1-D
 /// MLIR vectors. This is the counterpart of llvm.matrix.multiply in MLIR.
 /// This may seem redundant with vector.contract but it serves the purposes of
 /// more progressive lowering and localized type conversion on the path:
 ///   `vector<...x...xf32> -> vector<...xf32> -> !llvm<... x float>`.
 def Vector_MatmulOp : Vector_Op<"matrix_multiply", [NoSideEffect,
         PredOpTrait<"lhs operand and result have same element type",
                     TCresVTEtIsSameAsOpBase<0, 0>>,
         PredOpTrait<"rhs operand and result have same element type",
                     TCresVTEtIsSameAsOpBase<0, 1>>]>,
       Arguments<(
         // TODO(ntv, fhahn): tighten vector element types that make sense.
         ins VectorOfRankAndType<[1],
               [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$lhs,
             VectorOfRankAndType<[1],
               [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$rhs,
             I32Attr:$lhs_rows, I32Attr:$lhs_columns, I32Attr:$rhs_columns)>,
       Results<(
         outs VectorOfRankAndType<[1],
                [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$res)>
 {
   let summary = "Vector matrix multiplication op that operates on flattened 1-D"
     " MLIR vectors";
   let description = [{
     This is the counterpart of llvm.matrix.multiply in MLIR. It serves the
     purposes of more progressive lowering and localized type conversion.

     The ‘vector.matrix_multiply’ op treats `lhs` as matrix with <lhs_rows> rows
     and <lhs_columns> columns, `rhs` as matrix with <lhs_columns> rows and
     <rhs_columns> and multiplies them. The result matrix is returned embedded in
     the result vector.

     Example:

     ```mlir
     %C = vector.matrix_multiply %A, %B
       { lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32 } :
       (vector<64xf64>, vector<48xf64>) -> vector<12xf64>
     ```
   }];
   let builders = [
    OpBuilder<"Builder *builder, OperationState &result, Value lhs, Value rhs, "
              "unsigned lhsRows, unsigned lhsColumns, unsigned rhsColumns",
    [{
      result.addOperands({lhs, rhs});
      result.addAttribute("lhs_rows", builder->getI32IntegerAttr(lhsRows));
      result.addAttribute("lhs_columns", builder->getI32IntegerAttr(lhsColumns));
      result.addAttribute("rhs_columns", builder->getI32IntegerAttr(rhsColumns));
      result.addTypes(VectorType::get(lhsRows * lhsColumns,
        lhs.getType().cast<VectorType>().getElementType()));
    }]>,
   ];
   let verifier = ?;
   let assemblyFormat = "$lhs `,` $rhs attr-dict "
     "`:` `(` type($lhs) `,` type($rhs) `)` `->` type($res)";
 }

 #endif // VECTOR_OPS