mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml - llvm-project - Git at Google

 ### AUTOGENERATED from core_named_ops.py
 ### To regenerate, run: bin/update_core_linalg_named_ops.sh
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: matmul
   cpp_class_name: MatmulOp
   doc: |-
     Performs a matrix multiplication of two 2D inputs.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
   - !LinalgOperandDefConfig
     name: B
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2] -> (s1, s2)>
   - !LinalgOperandDefConfig
     name: C
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: B
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: matmul_unsigned
   cpp_class_name: MatmulUnsignedOp
   doc: |-
     Performs an unsigned matrix multiplication of two 2D inputs.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
   - !LinalgOperandDefConfig
     name: B
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2] -> (s1, s2)>
   - !LinalgOperandDefConfig
     name: C
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
                 is_unsigned_cast: true
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: B
                 is_unsigned_cast: true
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: quantized_matmul
   cpp_class_name: QuantizedMatmulOp
   doc: |-
     Performs a matrix multiplication of two 2D inputs.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. The quantized variant
     includes zero-point adjustments for the left and right operands of the
     matmul.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
   - !LinalgOperandDefConfig
     name: B
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2] -> (s1, s2)>
   - !LinalgOperandDefConfig
     name: AZp
     usage: InputOperand
     type_var: I32
   - !LinalgOperandDefConfig
     name: BZp
     usage: InputOperand
     type_var: I32
   - !LinalgOperandDefConfig
     name: C
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> ()>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> ()>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_apply:
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: A
                     is_unsigned_cast: false
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: AZp
                     is_unsigned_cast: false
             - !ScalarExpression
               scalar_apply:
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: B
                     is_unsigned_cast: false
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: BZp
                     is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: mmt4d
   cpp_class_name: Mmt4DOp
   doc: |-
     Performs a matrix-matrix-transpose multiplication of two 4D inputs.

     Differences from linalg.matmul:
     * The right hand side is transposed, whence the 't' in 'mmt'.
     * The input and output tensors have a 4D shape instead of a 2D shape. They
       are interpreted as 2D matrices with one level of 2D tile subdivision,
       whence the 2+2=4 dimensions. The inner tile dimensions are identified with
       '0' suffixes below, for instance the LHS matrix shape (M, K, M0, K0) reads
       as: MxK tiles, each of shape M0xK0.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: lhs
     usage: InputOperand
     type_var: LhsType
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2, s3)>
   - !LinalgOperandDefConfig
     name: rhs
     usage: InputOperand
     type_var: RhsType
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4, s1, s5, s3)>
   - !LinalgOperandDefConfig
     name: accum
     usage: OutputOperand
     type_var: AccumType
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s4, s2, s5)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d2, d3,
       d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d1, d2, d4,
       d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d3,
       d4)>
   iterator_types:
   - parallel
   - parallel
   - reduction
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: accum
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: accum
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: AccumType
                 operands:
                 - !ScalarExpression
                   scalar_arg: lhs
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: AccumType
                 operands:
                 - !ScalarExpression
                   scalar_arg: rhs
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: batch_matmul
   cpp_class_name: BatchMatmulOp
   doc: |-
     Performs a batched matrix multiplication of two 3D inputs.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
   - !LinalgOperandDefConfig
     name: B
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)>
   - !LinalgOperandDefConfig
     name: C
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: B
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: quantized_batch_matmul
   cpp_class_name: QuantizedBatchMatmulOp
   doc: |-
     Performs a batched matrix multiplication of two 3D inputs.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. The quantized variant
     includes zero-point adjustments for the left and right operands of the
     matmul.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
   - !LinalgOperandDefConfig
     name: B
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)>
   - !LinalgOperandDefConfig
     name: AZp
     usage: InputOperand
     type_var: I32
   - !LinalgOperandDefConfig
     name: BZp
     usage: InputOperand
     type_var: I32
   - !LinalgOperandDefConfig
     name: C
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> ()>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> ()>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_apply:
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: A
                     is_unsigned_cast: false
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: AZp
                     is_unsigned_cast: false
             - !ScalarExpression
               scalar_apply:
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: B
                     is_unsigned_cast: false
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: BZp
                     is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: matvec
   cpp_class_name: MatvecOp
   doc: |-
     Performs a matrix-vector multiplication.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1] -> (s0, s1)>
   - !LinalgOperandDefConfig
     name: y
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1] -> (s1)>
   - !LinalgOperandDefConfig
     name: x
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1] -> (s0)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1)[s0, s1] -> (d0, d1)>
     - affine_map<(d0, d1)[s0, s1] -> (d1)>
     - affine_map<(d0, d1)[s0, s1] -> (d0)>
   iterator_types:
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: x
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: x
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: y
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: vecmat
   cpp_class_name: VecmatOp
   doc: |-
     Performs a vector-matrix multiplication.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: y
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1] -> (s0)>
   - !LinalgOperandDefConfig
     name: A
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1] -> (s0, s1)>
   - !LinalgOperandDefConfig
     name: x
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1] -> (s1)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1)[s0, s1] -> (d1)>
     - affine_map<(d0, d1)[s0, s1] -> (d1, d0)>
     - affine_map<(d0, d1)[s0, s1] -> (d0)>
   iterator_types:
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: x
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: x
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: y
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: batch_matvec
   cpp_class_name: BatchMatvecOp
   doc: |-
     Performs a batched matrix-vector multiplication.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s1, s2)>
   - !LinalgOperandDefConfig
     name: B
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
   - !LinalgOperandDefConfig
     name: C
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1, d2)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: B
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: dot
   cpp_class_name: DotOp
   doc: |-
     Performs a dot product of two vectors to a scalar result.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0] -> (s0)>
   - !LinalgOperandDefConfig
     name: B
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0] -> (s0)>
   - !LinalgOperandDefConfig
     name: C
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0] -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0)[s0] -> (d0)>
     - affine_map<(d0)[s0] -> (d0)>
     - affine_map<(d0)[s0] -> ()>
   iterator_types:
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: B
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_1d
   cpp_class_name: Conv1DOp
   doc: |-
     Performs 1-D convolution with no channels.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1] -> (s0 + s1)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1] -> (s1)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1] -> (s0)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1)[s0, s1] -> (d0 + d1)>
     - affine_map<(d0, d1)[s0, s1] -> (d1)>
     - affine_map<(d0, d1)[s0, s1] -> (d0)>
   iterator_types:
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_2d
   cpp_class_name: Conv2DOp
   doc: |-
     Performs 2-D convolution with no channels.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0 + s1, s2 + s3)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s1, s3)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 + d2, d1 + d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d2, d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_3d
   cpp_class_name: Conv3DOp
   doc: |-
     Performs 3-D convolution with no channels.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0 + s1, s2 + s3, s4 + s5)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s1, s3, s5)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s2, s4)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0 + d3, d1
       + d4, d2 + d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d3, d4, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_1d_nwc_wcf
   cpp_class_name: Conv1DNwcWcfOp
   doc: |-
     Performs 1-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1 * s2 + s3 * s4,
       s5)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s3, s5, s6)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1, s6)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s2)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s4)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1 * s2
       + d3 * s4, d4)>
     - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d3, d4, d2)>
     - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_2d_nhwc_hwcf
   cpp_class_name: Conv2DNhwcHwcfOp
   doc: |-
     Performs 2-D convolution.

     Layout:
       * Input: NHWC.
       * Kernel: HWCF.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
       s7, s9, s10)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1, s5, s10)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s2,
       s6)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s4,
       s8)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d4, d5, d6, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_2d_nhwc_hwcf_q
   cpp_class_name: Conv2DNhwcHwcfQOp
   doc: |-
     Performs 2-D convolution with zero point offsets.

     Layout:
       * Input: NHWC.
       * Kernel: HWCF.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. This includes the zero
     point offsets common to quantized operations.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
       s7, s9, s10)>
   - !LinalgOperandDefConfig
     name: IZp
     usage: InputOperand
     type_var: I32
   - !LinalgOperandDefConfig
     name: KZp
     usage: InputOperand
     type_var: I32
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1, s5, s10)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s2,
       s6)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s4,
       s8)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d4, d5, d6, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_apply:
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: I
                     is_unsigned_cast: false
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: IZp
                     is_unsigned_cast: false
             - !ScalarExpression
               scalar_apply:
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: K
                     is_unsigned_cast: false
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: KZp
                     is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_2d_nchw_fchw
   cpp_class_name: Conv2DNchwFchwOp
   doc: |-
     Performs 2-D convolution.

     Layout:
       * Input: NCHW.
       * Kernel: FCHW.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1, s2 * s3 + s4 * s5, s6 * s7 + s8 * s9)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s10,
       s1, s4, s8)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s10, s2, s6)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
       s7)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s5,
       s9)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d4, d2 * s3 + d5 * s5, d3 * s7 + d6 * s9)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d1, d4, d5, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_3d_ndhwc_dhwcf
   cpp_class_name: Conv3DNdhwcDhwcfOp
   doc: |-
     Performs 3-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13, s14] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12,
       s13)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13, s14] -> (s3, s7, s11, s13, s14)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13, s14] -> (s0, s1, s5, s9, s14)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13, s14] -> (s2, s6, s10)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13, s14] -> (s4, s8, s12)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6
       * s8, d3 * s10 + d7 * s12, d8)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> (d5, d6, d7, d8, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1, d2, d3, d4)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_1d_nwc_wc
   cpp_class_name: DepthwiseConv1DNwcWcOp
   doc: |-
     Performs depth-wise 1-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. Multiplier is set to 1
     which is a special case for most depthwise convolutions.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3, s5)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
       d2)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3, d2)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_2d_nhwc_hwc
   cpp_class_name: DepthwiseConv2DNhwcHwcOp
   doc: |-
     Performs depth-wise 2-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. Multiplier is set to 1
     which is a special case for most depthwise convolutions.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
       s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7, s9)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
       s9)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2, s6)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4, s8)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d4, d5, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_2d_nhwc_hwc_q
   cpp_class_name: DepthwiseConv2DNhwcHwcQOp
   doc: |-
     Performs depth-wise 2-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
       s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7, s9)>
   - !LinalgOperandDefConfig
     name: IZp
     usage: InputOperand
     type_var: I32
   - !LinalgOperandDefConfig
     name: KZp
     usage: InputOperand
     type_var: I32
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
       s9)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2, s6)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4, s8)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d4, d5, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_apply:
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: I
                     is_unsigned_cast: false
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: IZp
                     is_unsigned_cast: false
             - !ScalarExpression
               scalar_apply:
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: K
                     is_unsigned_cast: false
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: KZp
                     is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_2d_nhwc_hwcm
   cpp_class_name: DepthwiseConv2DNhwcHwcmOp
   doc: |-
     Performs depth-wise 2-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
       s7, s9, s10)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1, s5, s9, s10)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s2,
       s6)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s4,
       s8)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d5, d6, d3, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1, d2, d3, d4)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
                 is_unsigned_cast: false
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
                 is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_2d_nhwc_hwcm_q
   cpp_class_name: DepthwiseConv2DNhwcHwcmQOp
   doc: |-
     Performs depth-wise 2-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
       s7, s9, s10)>
   - !LinalgOperandDefConfig
     name: IZp
     usage: InputOperand
     type_var: I32
   - !LinalgOperandDefConfig
     name: KZp
     usage: InputOperand
     type_var: I32
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1, s5, s9, s10)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s2,
       s6)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s4,
       s8)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d5, d6, d3, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1, d2, d3, d4)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_apply:
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_apply:
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: I
                     is_unsigned_cast: false
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: IZp
                     is_unsigned_cast: false
             - !ScalarExpression
               scalar_apply:
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: K
                     is_unsigned_cast: false
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: KZp
                     is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nhwc_sum
   cpp_class_name: PoolingNhwcSumOp
   doc: |-
     Performs sum pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
       s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
       s9)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2, s6)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4, s8)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1 * s2 + d3 * s4, d2 * s6 + d4 * s8, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d3, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d5)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - parallel
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           symbolic_cast:
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
             is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nhwc_max
   cpp_class_name: PoolingNhwcMaxOp
   doc: |-
     Performs max pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
       s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
       s9)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2, s6)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4, s8)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1 * s2 + d3 * s4, d2 * s6 + d4 * s8, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d3, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d5)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - parallel
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: max
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           symbolic_cast:
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
             is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nhwc_max_unsigned
   cpp_class_name: PoolingNhwcMaxUnsignedOp
   doc: |-
     Performs unsigned max pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
       s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
       s9)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2, s6)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4, s8)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1 * s2 + d3 * s4, d2 * s6 + d4 * s8, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d3, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d5)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - parallel
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: max_unsigned
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           symbolic_cast:
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
             is_unsigned_cast: true
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nchw_max
   cpp_class_name: PoolingNchwMaxOp
   doc: |-
     Performs max pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2
       * s3 + s4 * s5, s6 * s7 + s8 * s9)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4, s8)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2,
       s6)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s5, s9)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2 * s3 + d4 * s5, d3 * s7 + d5 * s9)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d4, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: max
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           symbolic_cast:
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
             is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nhwc_min
   cpp_class_name: PoolingNhwcMinOp
   doc: |-
     Performs min pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
       s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
       s9)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2, s6)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4, s8)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1 * s2 + d3 * s4, d2 * s6 + d4 * s8, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d3, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d5)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - parallel
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: min
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           symbolic_cast:
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
             is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nhwc_min_unsigned
   cpp_class_name: PoolingNhwcMinUnsignedOp
   doc: |-
     Performs unsigned min pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
       s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
       s9)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2, s6)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4, s8)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1 * s2 + d3 * s4, d2 * s6 + d4 * s8, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d3, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d5)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - parallel
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: min_unsigned
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           symbolic_cast:
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
             is_unsigned_cast: true
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_ndhwc_sum
   cpp_class_name: PoolingNdhwcSumOp
   doc: |-
     Performs 3D sum pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s3, s7, s11)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1, s5, s9, s13)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s2, s6, s10)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s4, s8, s12)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3
       * s10 + d6 * s12, d7)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d4, d5, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d7)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   - parallel
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           symbolic_cast:
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
             is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_ndhwc_max
   cpp_class_name: PoolingNdhwcMaxOp
   doc: |-
     Performs 3D max pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s3, s7, s11)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1, s5, s9, s13)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s2, s6, s10)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s4, s8, s12)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3
       * s10 + d6 * s12, d7)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d4, d5, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d7)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   - parallel
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: max
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           symbolic_cast:
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
             is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_ndhwc_min
   cpp_class_name: PoolingNdhwcMinOp
   doc: |-
     Performs 3D min pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
   - !LinalgOperandDefConfig
     name: K
     usage: InputOperand
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s3, s7, s11)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1, s5, s9, s13)>
   - !LinalgOperandDefConfig
     name: strides
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s2, s6, s10)>
   - !LinalgOperandDefConfig
     name: dilations
     usage: IndexAttribute
     type_var: I64
     attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s4, s8, s12)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3
       * s10 + d6 * s12, d7)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d4, d5, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d7)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   - parallel
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: min
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           symbolic_cast:
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
             is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: fill_rng_2d
   cpp_class_name: FillRng2DOp
   doc: |-
     Fills the output tensor with pseudo random numbers.

     The operation generations pseudo random numbers using a linear congruential
     generator. It provides no guarantees regarding the distribution of the
     generated random numbers. Instead of generating the random numbers
     sequentially, it instantiates one random number generator per data element
     and runs them in parallel. The seed operand and the indices of the data
     element seed the random number generation. The min and max operands limit
     the range of the generated random numbers.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: min
     usage: InputOperand
     type_var: F64
   - !LinalgOperandDefConfig
     name: max
     usage: InputOperand
     type_var: F64
   - !LinalgOperandDefConfig
     name: seed
     usage: InputOperand
     type_var: I32
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: T
     shape_map: affine_map<()[s0, s1] -> (s0, s1)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1)[s0, s1] -> ()>
     - affine_map<(d0, d1)[s0, s1] -> ()>
     - affine_map<(d0, d1)[s0, s1] -> ()>
     - affine_map<(d0, d1)[s0, s1] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       symbolic_cast:
         type_var: T
         operands:
         - !ScalarExpression
           scalar_apply:
             fn_name: add
             operands:
             - !ScalarExpression
               scalar_apply:
                 fn_name: mul
                 operands:
                 - !ScalarExpression
                   scalar_apply:
                     fn_name: add
                     operands:
                     - !ScalarExpression
                       symbolic_cast:
                         type_var: F64
                         operands:
                         - !ScalarExpression
                           scalar_const: '2147483647 : i64'
                         is_unsigned_cast: false
                     - !ScalarExpression
                       symbolic_cast:
                         type_var: F64
                         operands:
                         - !ScalarExpression
                           scalar_apply:
                             fn_name: add
                             operands:
                             - !ScalarExpression
                               scalar_apply:
                                 fn_name: mul
                                 operands:
                                 - !ScalarExpression
                                   scalar_apply:
                                     fn_name: add
                                     operands:
                                     - !ScalarExpression
                                       symbolic_cast:
                                         type_var: I32
                                         operands:
                                         - !ScalarExpression
                                           scalar_index: 1
                                         is_unsigned_cast: false
                                     - !ScalarExpression
                                       scalar_apply:
                                         fn_name: add
                                         operands:
                                         - !ScalarExpression
                                           scalar_apply:
                                             fn_name: mul
                                             operands:
                                             - !ScalarExpression
                                               scalar_apply:
                                                 fn_name: add
                                                 operands:
                                                 - !ScalarExpression
                                                   symbolic_cast:
                                                     type_var: I32
                                                     operands:
                                                     - !ScalarExpression
                                                       scalar_index: 0
                                                     is_unsigned_cast: false
                                                 - !ScalarExpression
                                                   scalar_arg: seed
                                             - !ScalarExpression
                                               symbolic_cast:
                                                 type_var: I32
                                                 operands:
                                                 - !ScalarExpression
                                                   scalar_const: '1103515245 : i64'
                                                 is_unsigned_cast: false
                                         - !ScalarExpression
                                           symbolic_cast:
                                             type_var: I32
                                             operands:
                                             - !ScalarExpression
                                               scalar_const: '12345 : i64'
                                             is_unsigned_cast: false
                                 - !ScalarExpression
                                   symbolic_cast:
                                     type_var: I32
                                     operands:
                                     - !ScalarExpression
                                       scalar_const: '1103515245 : i64'
                                     is_unsigned_cast: false
                             - !ScalarExpression
                               symbolic_cast:
                                 type_var: I32
                                 operands:
                                 - !ScalarExpression
                                   scalar_const: '12345 : i64'
                                 is_unsigned_cast: false
                         is_unsigned_cast: false
                 - !ScalarExpression
                   scalar_apply:
                     fn_name: mul
                     operands:
                     - !ScalarExpression
                       scalar_apply:
                         fn_name: sub
                         operands:
                         - !ScalarExpression
                           scalar_arg: max
                         - !ScalarExpression
                           scalar_arg: min
                     - !ScalarExpression
                       symbolic_cast:
                         type_var: F64
                         operands:
                         - !ScalarExpression
                           scalar_const: '2.3283063999999999E-10 : f64'
                         is_unsigned_cast: false
             - !ScalarExpression
               scalar_arg: min
         is_unsigned_cast: false
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: soft_plus_2d
   cpp_class_name: SoftPlus2DOp
   doc: |-
     Implements the soft plus operator.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     usage: InputOperand
     type_var: T
     shape_map: affine_map<()[s0, s1] -> (s0, s1)>
   - !LinalgOperandDefConfig
     name: O
     usage: OutputOperand
     type_var: U
     shape_map: affine_map<()[s0, s1] -> (s0, s1)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1)[s0, s1] -> (d0, d1)>
     - affine_map<(d0, d1)[s0, s1] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_apply:
         fn_name: log
         operands:
         - !ScalarExpression
           scalar_apply:
             fn_name: add
             operands:
             - !ScalarExpression
               symbolic_cast:
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_const: '1.000000e+00 : f64'
                 is_unsigned_cast: false
             - !ScalarExpression
               scalar_apply:
                 fn_name: exp
                 operands:
                 - !ScalarExpression
                   symbolic_cast:
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: I
                     is_unsigned_cast: false