mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml - llvm-project - Git at Google

 ### AUTOGENERATED from core_named_ops.py
 ### To regenerate, run: bin/update_core_linalg_named_ops.sh
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: copy
   cpp_class_name: CopyOp
   doc: |-
     Copies the tensor elementwise.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   defines:
   - hasCanonicalizer
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: cast
     kind: type_fn_attr
     default_fn: cast_signed
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: type
         attr_name: cast
         type_var: U
         operands:
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: elemwise_unary
   cpp_class_name: ElemwiseUnaryOp
   doc: |-
     Applies the unary function fun elementwise.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: fun
     kind: unary_fn_attr
     default_fn: exp
   - !LinalgOperandDefConfig
     name: cast
     kind: type_fn_attr
     default_fn: cast_signed
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: unary
         attr_name: fun
         operands:
         - !ScalarExpression
           scalar_fn:
             kind: type
             attr_name: cast
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: exp
   cpp_class_name: ExpOp
   doc: |-
     Applies exp(x) elementwise.

     No numeric casting is performed on the input operand.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: unary
         fn_name: exp
         operands:
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: log
   cpp_class_name: LogOp
   doc: |-
     Applies log(x) elementwise.

     No numeric casting is performed on the input operand.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: unary
         fn_name: log
         operands:
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: abs
   cpp_class_name: AbsOp
   doc: |-
     Applies abs(x) elementwise.

     No numeric casting is performed on the input operand.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: unary
         fn_name: abs
         operands:
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: ceil
   cpp_class_name: CeilOp
   doc: |-
     Applies ceil(x) elementwise.

     No numeric casting is performed on the input operand.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: unary
         fn_name: ceil
         operands:
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: floor
   cpp_class_name: FloorOp
   doc: |-
     Applies floor(x) elementwise.

     No numeric casting is performed on the input operand.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: unary
         fn_name: floor
         operands:
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: negf
   cpp_class_name: NegfOp
   doc: |-
     Applies negf(x) elementwise.

     No numeric casting is performed on the input operand.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: unary
         fn_name: negf
         operands:
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: reciprocal
   cpp_class_name: ReciprocalOp
   doc: |-
     Applies reciprocal(x) elementwise.

     No numeric casting is performed on the input operand.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: unary
         fn_name: reciprocal
         operands:
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: round
   cpp_class_name: RoundOp
   doc: |-
     Applies round(x) elementwise.

     No numeric casting is performed on the input operand.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: unary
         fn_name: round
         operands:
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: sqrt
   cpp_class_name: SqrtOp
   doc: |-
     Applies sqrt(x) elementwise.

     No numeric casting is performed on the input operand.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: unary
         fn_name: sqrt
         operands:
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: rsqrt
   cpp_class_name: RsqrtOp
   doc: |-
     Applies rsqrt(x) elementwise.

     No numeric casting is performed on the input operand.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: unary
         fn_name: rsqrt
         operands:
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: square
   cpp_class_name: SquareOp
   doc: |-
     Applies square(x) elementwise.

     No numeric casting is performed on the input operand.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: unary
         fn_name: square
         operands:
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: tanh
   cpp_class_name: TanhOp
   doc: |-
     Applies tanh(x) elementwise.

     No numeric casting is performed on the input operand.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: unary
         fn_name: tanh
         operands:
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: erf
   cpp_class_name: erfOp
   doc: |-
     Applies erf(x) elementwise.

     No numeric casting is performed on the input operand.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: unary
         fn_name: erf
         operands:
         - !ScalarExpression
           scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: elemwise_binary
   cpp_class_name: ElemwiseBinaryOp
   doc: |-
     Applies the binary function fun elementwise.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: lhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: rhs
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: fun
     kind: binary_fn_attr
     default_fn: add
   - !LinalgOperandDefConfig
     name: cast
     kind: type_fn_attr
     default_fn: cast_signed
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         attr_name: fun
         operands:
         - !ScalarExpression
           scalar_fn:
             kind: type
             attr_name: cast
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: lhs
         - !ScalarExpression
           scalar_fn:
             kind: type
             attr_name: cast
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: rhs
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: add
   cpp_class_name: AddOp
   doc: |-
     Adds two tensors elementwise.

     The shapes and element types must be identical. The appropriate casts,
     broadcasts and reductions should be done previously to calling this op.

     This means reduction/broadcast/element cast semantics is explicit. Further
     passes can take that into account when lowering this code. For example,
     a `linalg.broadcast` + `linalg.add` sequence can be lowered to a
     `linalg.generic` with different affine maps for the two operands.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: lhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: rhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: lhs
         - !ScalarExpression
           scalar_arg: rhs
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: sub
   cpp_class_name: SubOp
   doc: |-
     Subtracts two tensors elementwise.

     The shapes and element types must be identical. The appropriate casts,
     broadcasts and reductions should be done previously to calling this op.

     This means reduction/broadcast/element cast semantics is explicit. Further
     passes can take that into account when lowering this code. For example,
     a `linalg.broadcast` + `linalg.sub` sequence can be lowered to a
     `linalg.generic` with different affine maps for the two operands.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: lhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: rhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: sub
         operands:
         - !ScalarExpression
           scalar_arg: lhs
         - !ScalarExpression
           scalar_arg: rhs
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: mul
   cpp_class_name: MulOp
   doc: |-
     Multiplies two tensors elementwise.

     The shapes and element types must be identical. The appropriate casts,
     broadcasts and reductions should be done previously to calling this op.

     This means reduction/broadcast/element cast semantics is explicit. Further
     passes can take that into account when lowering this code. For example,
     a `linalg.broadcast` + `linalg.mul` sequence can be lowered to a
     `linalg.generic` with different affine maps for the two operands.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: lhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: rhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: mul
         operands:
         - !ScalarExpression
           scalar_arg: lhs
         - !ScalarExpression
           scalar_arg: rhs
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: div
   cpp_class_name: DivOp
   doc: |-
     Divides the first tensor by the second tensor, elementwise.

     The shapes and element types must be identical. The appropriate casts,
     broadcasts and reductions should be done previously to calling this op.

     This means reduction/broadcast/element cast semantics is explicit. Further
     passes can take that into account when lowering this code. For example,
     a `linalg.broadcast` + `linalg.div` sequence can be lowered to a
     `linalg.generic` with different affine maps for the two operands.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: lhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: rhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: div
         operands:
         - !ScalarExpression
           scalar_arg: lhs
         - !ScalarExpression
           scalar_arg: rhs
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: div_unsigned
   cpp_class_name: DivUnsignedOp
   doc: |-
     Divides the first tensor by the second tensor, elementwise. For integer
     types, performs an unsigned division.

     The shapes and element types must be identical. The appropriate casts,
     broadcasts and reductions should be done previously to calling this op.

     This means reduction/broadcast/element cast semantics is explicit. Further
     passes can take that into account when lowering this code. For example,
     a `linalg.broadcast` + `linalg.div` sequence can be lowered to a
     `linalg.generic` with different affine maps for the two operands.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: lhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: rhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: div_unsigned
         operands:
         - !ScalarExpression
           scalar_arg: lhs
         - !ScalarExpression
           scalar_arg: rhs
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: max
   cpp_class_name: MaxOp
   doc: |-
     Takes the max (signed) between two inputs, elementwise.

     The shapes and element types must be identical. The appropriate casts,
     broadcasts and reductions should be done previously to calling this op.

     This means reduction/broadcast/element cast semantics is explicit. Further
     passes can take that into account when lowering this code. For example,
     a `linalg.broadcast` + `linalg.max` sequence can be lowered to a
     `linalg.generic` with different affine maps for the two operands.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: lhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: rhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: max_signed
         operands:
         - !ScalarExpression
           scalar_arg: lhs
         - !ScalarExpression
           scalar_arg: rhs
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: min
   cpp_class_name: MinOp
   doc: |-
     Takes the min (signed) between two inputs, elementwise.

     The shapes and element types must be identical. The appropriate casts,
     broadcasts and reductions should be done previously to calling this op.

     This means reduction/broadcast/element cast semantics is explicit. Further
     passes can take that into account when lowering this code. For example,
     a `linalg.broadcast` + `linalg.min` sequence can be lowered to a
     `linalg.generic` with different affine maps for the two operands.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: lhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: rhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: min_signed
         operands:
         - !ScalarExpression
           scalar_arg: lhs
         - !ScalarExpression
           scalar_arg: rhs
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: powf
   cpp_class_name: PowFOp
   doc: |-
     Takes the powf(lhs, rhs) between two inputs, elementwise. For powf(arg, 2) use `linalg.square`.

     Only applies to floating point values.

     The shapes and element types must be identical. The appropriate casts,
     broadcasts and reductions should be done previously to calling this op.

     This means reduction/broadcast/element cast semantics is explicit. Further
     passes can take that into account when lowering this code. For example,
     a `linalg.broadcast` + `linalg.powf` sequence can be lowered to a
     `linalg.generic` with different affine maps for the two operands.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: lhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: rhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: powf
         operands:
         - !ScalarExpression
           scalar_arg: lhs
         - !ScalarExpression
           scalar_arg: rhs
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: select
   cpp_class_name: SelectOp
   doc: |-
     Chooses one value based on a binary condition supplied as its first operand.

     The shapes and element types must be identical. The appropriate casts,
     broadcasts and reductions should be done previously to calling this op.

     This means reduction/broadcast/element cast semantics is explicit. Further
     passes can take that into account when lowering this code. For example,
     a `linalg.broadcast` + `linalg.select` sequence can be lowered to a
     `linalg.generic` with different affine maps for the two operands.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: cond
     kind: input_tensor
     type_var: U
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: lhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: rhs
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T1
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: ternary
         fn_name: select
         operands:
         - !ScalarExpression
           scalar_arg: cond
         - !ScalarExpression
           scalar_arg: lhs
         - !ScalarExpression
           scalar_arg: rhs
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: matmul
   cpp_class_name: MatmulOp
   doc: |-
     Performs a matrix multiplication of two 2D inputs.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
   - !LinalgOperandDefConfig
     name: B
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2] -> (s1, s2)>
   - !LinalgOperandDefConfig
     name: C
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
   - !LinalgOperandDefConfig
     name: cast
     kind: type_fn_attr
     default_fn: cast_signed
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 attr_name: cast
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 attr_name: cast
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: B
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: matmul_unsigned
   cpp_class_name: MatmulUnsignedOp
   doc: |-
     Performs an unsigned matrix multiplication of two 2D inputs.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
   - !LinalgOperandDefConfig
     name: B
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2] -> (s1, s2)>
   - !LinalgOperandDefConfig
     name: C
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_unsigned
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_unsigned
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: B
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: quantized_matmul
   cpp_class_name: QuantizedMatmulOp
   doc: |-
     Performs a matrix multiplication of two 2D inputs.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. The quantized variant
     includes zero-point adjustments for the left and right operands of the
     matmul.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
   - !LinalgOperandDefConfig
     name: B
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2] -> (s1, s2)>
   - !LinalgOperandDefConfig
     name: AZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: BZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: C
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> ()>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> ()>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: A
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: AZp
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: B
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: BZp
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: matmul_transpose_a
   cpp_class_name: MatmulTransposeAOp
   doc: |-
     Performs a matrix multiplication of two 2D inputs with lhs operand
     transposed.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
   - !LinalgOperandDefConfig
     name: B
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
   - !LinalgOperandDefConfig
     name: C
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2] -> (s2, s1)>
   - !LinalgOperandDefConfig
     name: cast
     kind: type_fn_attr
     default_fn: cast_signed
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d0)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 attr_name: cast
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 attr_name: cast
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: B
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: matmul_transpose_b
   cpp_class_name: MatmulTransposeBOp
   doc: |-
     Performs a matrix multiplication of two 2D inputs with rhs operand
     transposed.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
   - !LinalgOperandDefConfig
     name: B
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2] -> (s2, s1)>
   - !LinalgOperandDefConfig
     name: C
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
   - !LinalgOperandDefConfig
     name: cast
     kind: type_fn_attr
     default_fn: cast_signed
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d1, d2)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 attr_name: cast
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 attr_name: cast
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: B
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: mmt4d
   cpp_class_name: Mmt4DOp
   doc: |-
     Performs a matrix-matrix-transpose multiplication of two 4D inputs.

     Differences from linalg.matmul:
     * The right hand side is transposed, whence the 't' in 'mmt'.
     * The input and output tensors have a 4D shape instead of a 2D shape. They
       are interpreted as 2D matrices with one level of 2D tile subdivision,
       whence the 2+2=4 dimensions. The inner tile dimensions are identified with
       '0' suffixes below, for instance the LHS matrix shape (M, K, M0, K0) reads
       as: MxK tiles, each of shape M0xK0.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: lhs
     kind: input_tensor
     type_var: LhsType
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2, s3)>
   - !LinalgOperandDefConfig
     name: rhs
     kind: input_tensor
     type_var: RhsType
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4, s1, s5, s3)>
   - !LinalgOperandDefConfig
     name: accum
     kind: output_tensor
     type_var: AccumType
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s4, s2, s5)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d2, d3,
       d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d1, d2, d4,
       d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d3,
       d4)>
   iterator_types:
   - parallel
   - parallel
   - reduction
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: accum
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: accum
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: AccumType
                 operands:
                 - !ScalarExpression
                   scalar_arg: lhs
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: AccumType
                 operands:
                 - !ScalarExpression
                   scalar_arg: rhs
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: batch_mmt4d
   cpp_class_name: BatchMmt4DOp
   doc: |-
     Performs a batched matrix-matrix-transpose multiplication of two
     batched-4D (5D) inputs.

     Besides the outermost batch dimension has the same semantic as
     linalg.batch_matmul, the differences from linalg.batch_matmul in the
     non-batch dimensions are the same as linalg.mmt4d vs. linalg.matmul. See the
     description of lingalg.mmt4d.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: lhs
     kind: input_tensor
     type_var: LhsType
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1, s2, s3, s4)>
   - !LinalgOperandDefConfig
     name: rhs
     kind: input_tensor
     type_var: RhsType
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s5, s2, s6, s4)>
   - !LinalgOperandDefConfig
     name: accum
     kind: output_tensor
     type_var: AccumType
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1, s5, s3, s6)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6] -> (d0,
       d1, d3, d4, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6] -> (d0,
       d2, d3, d5, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6] -> (d0,
       d1, d2, d4, d5)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: accum
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: accum
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: AccumType
                 operands:
                 - !ScalarExpression
                   scalar_arg: lhs
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: AccumType
                 operands:
                 - !ScalarExpression
                   scalar_arg: rhs
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: batch_matmul
   cpp_class_name: BatchMatmulOp
   doc: |-
     Performs a batched matrix multiplication of two 3D inputs.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
   - !LinalgOperandDefConfig
     name: B
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)>
   - !LinalgOperandDefConfig
     name: C
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: B
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: batch_matmul_transpose_a
   cpp_class_name: BatchMatmulTransposeAOp
   doc: |-
     Performs a batched matrix multiplication of two 3D inputs where lhs operand
     has its non-batch dimensions transposed.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
   - !LinalgOperandDefConfig
     name: B
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
   - !LinalgOperandDefConfig
     name: C
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d1)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: B
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: batch_matmul_transpose_b
   cpp_class_name: BatchMatmulTransposeBOp
   doc: |-
     Performs a batched matrix multiplication of two 3D inputs where rhs operand
     has its non-batch dimensions transposed.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
   - !LinalgOperandDefConfig
     name: B
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s3, s2)>
   - !LinalgOperandDefConfig
     name: C
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d2, d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: B
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: quantized_batch_matmul
   cpp_class_name: QuantizedBatchMatmulOp
   doc: |-
     Performs a batched matrix multiplication of two 3D inputs.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. The quantized variant
     includes zero-point adjustments for the left and right operands of the
     matmul.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
   - !LinalgOperandDefConfig
     name: B
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)>
   - !LinalgOperandDefConfig
     name: AZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: BZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: C
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> ()>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> ()>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: A
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: AZp
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: B
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: BZp
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: batch_reduce_matmul
   cpp_class_name: BatchReduceMatmulOp
   doc: |-
     Performs a batch-reduce matrix multiplication of two 3D inputs.
     The partial multiplication results are reduced into a 2D output.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
   - !LinalgOperandDefConfig
     name: B
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)>
   - !LinalgOperandDefConfig
     name: C
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s1, s3)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d1, d2)>
   iterator_types:
   - reduction
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_signed
             type_var: U
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: mul
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: B
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: matvec
   cpp_class_name: MatvecOp
   doc: |-
     Performs a matrix-vector multiplication.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1] -> (s0, s1)>
   - !LinalgOperandDefConfig
     name: y
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1] -> (s1)>
   - !LinalgOperandDefConfig
     name: x
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1] -> (s0)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1)[s0, s1] -> (d0, d1)>
     - affine_map<(d0, d1)[s0, s1] -> (d1)>
     - affine_map<(d0, d1)[s0, s1] -> (d0)>
   iterator_types:
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: x
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: x
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: y
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: vecmat
   cpp_class_name: VecmatOp
   doc: |-
     Performs a vector-matrix multiplication.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: y
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1] -> (s0)>
   - !LinalgOperandDefConfig
     name: A
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1] -> (s0, s1)>
   - !LinalgOperandDefConfig
     name: x
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1] -> (s1)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1)[s0, s1] -> (d1)>
     - affine_map<(d0, d1)[s0, s1] -> (d1, d0)>
     - affine_map<(d0, d1)[s0, s1] -> (d0)>
   iterator_types:
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: x
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: x
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: y
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: batch_matvec
   cpp_class_name: BatchMatvecOp
   doc: |-
     Performs a batched matrix-vector multiplication.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s1, s2)>
   - !LinalgOperandDefConfig
     name: B
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
   - !LinalgOperandDefConfig
     name: C
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1, d2)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: B
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: batch_vecmat
   cpp_class_name: BatchVecmatOp
   doc: |-
     Performs a batched matrix-vector multiplication.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
   - !LinalgOperandDefConfig
     name: B
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s1, s2)>
   - !LinalgOperandDefConfig
     name: C
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2, d1)>
     - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: B
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: dot
   cpp_class_name: DotOp
   doc: |-
     Performs a dot product of two vectors to a scalar result.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgContractionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: A
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0] -> (s0)>
   - !LinalgOperandDefConfig
     name: B
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0] -> (s0)>
   - !LinalgOperandDefConfig
     name: C
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0] -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0)[s0] -> (d0)>
     - affine_map<(d0)[s0] -> (d0)>
     - affine_map<(d0)[s0] -> ()>
   iterator_types:
   - reduction
   assignments:
   - !ScalarAssign
     arg: C
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: C
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: A
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: B
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_1d
   cpp_class_name: Conv1DOp
   doc: |-
     Performs 1-D convolution with no channels.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1] -> (s0 + s1)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1] -> (s1)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1] -> (s0)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1)[s0, s1] -> (d0 + d1)>
     - affine_map<(d0, d1)[s0, s1] -> (d1)>
     - affine_map<(d0, d1)[s0, s1] -> (d0)>
   iterator_types:
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_2d
   cpp_class_name: Conv2DOp
   doc: |-
     Performs 2-D convolution with no channels.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0 + s1, s2 + s3)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s1, s3)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 + d2, d1 + d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d2, d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_3d
   cpp_class_name: Conv3DOp
   doc: |-
     Performs 3-D convolution with no channels.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0 + s1, s2 + s3, s4 + s5)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s1, s3, s5)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s2, s4)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0 + d3, d1
       + d4, d2 + d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d3, d4, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_1d_nwc_wcf
   cpp_class_name: Conv1DNwcWcfOp
   doc: |-
     Performs 1-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1 * s2 + s3 * s4,
       s5)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s3, s5, s6)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1, s6)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s2)>
     default_indices:
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s4)>
     default_indices:
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1 * s2
       + d3 * s4, d4)>
     - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d3, d4, d2)>
     - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_1d_ncw_fcw
   cpp_class_name: Conv1DNcwFcwOp
   doc: |-
     Performs 1-D convolution.

     Layout:
       * Input: NCW.
       * Kernel: FCW.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1, s2 * s3 + s4
       * s5)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s6, s1, s4)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s6, s2)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s3)>
     default_indices:
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s5)>
     default_indices:
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d3, d2 *
       s3 + d4 * s5)>
     - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d1, d3, d4)>
     - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_2d_nhwc_hwcf
   cpp_class_name: Conv2DNhwcHwcfOp
   doc: |-
     Performs 2-D convolution.

     Layout:
       * Input: NHWC.
       * Kernel: HWCF.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
       s7, s9, s10)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1, s5, s10)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
       (s2, s6)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
       (s4, s8)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d4, d5, d6, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_2d_nhwc_fhwc
   cpp_class_name: Conv2DNhwcFhwcOp
   doc: |-
     Performs 2-D convolution.

     Layout:
       * Input: NHWC.
       * Kernel: FHWC.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s10,
       s3, s7, s9)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1, s5, s10)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
       (s2, s6)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
       (s4, s8)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d3, d4, d5, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_2d_nhwc_hwcf_q
   cpp_class_name: Conv2DNhwcHwcfQOp
   doc: |-
     Performs 2-D convolution with zero point offsets.

     Layout:
       * Input: NHWC.
       * Kernel: HWCF.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. This includes the zero
     point offsets common to quantized operations.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
       s7, s9, s10)>
   - !LinalgOperandDefConfig
     name: IZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: KZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1, s5, s10)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
       (s2, s6)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
       (s4, s8)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d4, d5, d6, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: I
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: IZp
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: K
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: KZp
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_2d_nhwc_fhwc_q
   cpp_class_name: Conv2DNhwcFhwcQOp
   doc: |-
     Performs 2-D convolution with zero point offsets.

     Layout:
       * Input: NHWC.
       * Kernel: FHWC.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. This includes the zero
     point offsets common to quantized operations.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s10,
       s3, s7, s9)>
   - !LinalgOperandDefConfig
     name: IZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: KZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1, s5, s10)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
       (s2, s6)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
       (s4, s8)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d3, d4, d5, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: I
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: IZp
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: K
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: KZp
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_2d_nchw_fchw
   cpp_class_name: Conv2DNchwFchwOp
   doc: |-
     Performs 2-D convolution.

     Layout:
       * Input: NCHW.
       * Kernel: FCHW.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1, s2 * s3 + s4 * s5, s6 * s7 + s8 * s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s10,
       s1, s4, s8)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s10, s2, s6)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
       (s3, s7)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
       (s5, s9)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d4, d2 * s3 + d5 * s5, d3 * s7 + d6 * s9)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d1, d4, d5, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_2d_ngchw_fgchw
   cpp_class_name: Conv2DNgchwFgchwOp
   doc: |-
     Performs 2-D grouped convolution.

     Layout:
       * Input: NGCHW.
       * Kernel: FGCHW.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
       (s0, s1, s2, s3 * s4 + s5 * s6, s7 * s8 + s9 * s10)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
       (s11, s1, s2, s5, s9)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
       (s0, s1, s11, s3, s7)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
       -> (s4, s8)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
       -> (s6, s10)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11] -> (d0, d1, d5, d3 * s4 + d6 * s6, d4 * s8 + d7 * s10)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11] -> (d2, d1, d5, d6, d7)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11] -> (d0, d1, d2, d3, d4)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_2d_ngchw_gfchw
   cpp_class_name: Conv2DNgchwGfchwOp
   doc: |-
     Performs 2-D grouped convolution.

     Layout:
       * Input: NGCHW.
       * Kernel: GFCHW.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
       (s0, s1, s2, s3 * s4 + s5 * s6, s7 * s8 + s9 * s10)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
       (s1, s11, s2, s5, s9)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
       (s0, s1, s11, s3, s7)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
       -> (s4, s8)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
       -> (s6, s10)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11] -> (d0, d1, d5, d3 * s4 + d6 * s6, d4 * s8 + d7 * s10)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11] -> (d1, d2, d5, d6, d7)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11] -> (d0, d1, d2, d3, d4)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_2d_ngchw_gfchw_q
   cpp_class_name: Conv2DNgchwGfchwQOp
   doc: |-
     Performs 2-D grouped convolution with zero-point offsets.

     Layout:
       * Input: NGCHW.
       * Kernel: GFCHW.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. This includes the zero
     point offsets common to quantized operations.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
       (s0, s1, s2, s3 * s4 + s5 * s6, s7 * s8 + s9 * s10)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
       (s1, s11, s2, s5, s9)>
   - !LinalgOperandDefConfig
     name: IZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: KZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
       (s0, s1, s11, s3, s7)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
       -> (s4, s8)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
       -> (s6, s10)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11] -> (d0, d1, d5, d3 * s4 + d6 * s6, d4 * s8 + d7 * s10)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11] -> (d1, d2, d5, d6, d7)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11] -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11] -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11] -> (d0, d1, d2, d3, d4)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: I
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: IZp
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: K
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: KZp
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_3d_ndhwc_dhwcf
   cpp_class_name: Conv3DNdhwcDhwcfOp
   doc: |-
     Performs 3-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13, s14] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12,
       s13)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13, s14] -> (s3, s7, s11, s13, s14)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13, s14] -> (s0, s1, s5, s9, s14)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13, s14] -> (s2, s6, s10)>
     default_indices:
     - 1
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13, s14] -> (s4, s8, s12)>
     default_indices:
     - 1
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6
       * s8, d3 * s10 + d7 * s12, d8)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> (d5, d6, d7, d8, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1, d2, d3, d4)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_3d_ndhwc_dhwcf_q
   cpp_class_name: Conv3DNdhwcDhwcfQOp
   doc: |-
     Performs 3-D convolution with zero point offsets.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. This includes the zero
     point offsets common to quantized operations.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13, s14] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12,
       s13)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13, s14] -> (s3, s7, s11, s13, s14)>
   - !LinalgOperandDefConfig
     name: IZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: KZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13, s14] -> (s0, s1, s5, s9, s14)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13, s14] -> (s2, s6, s10)>
     default_indices:
     - 1
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13, s14] -> (s4, s8, s12)>
     default_indices:
     - 1
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6
       * s8, d3 * s10 + d7 * s12, d8)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> (d5, d6, d7, d8, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1, d2, d3, d4)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: I
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: IZp
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: K
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: KZp
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: conv_3d_ncdhw_fcdhw
   cpp_class_name: Conv3DNcdhwFcdhwOp
   doc: |-
     Performs 3-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13, s14] -> (s0, s1, s2 * s3 + s4 * s5, s6 * s7 + s8 * s9, s10 * s11 + s12
       * s13)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13, s14] -> (s14, s1, s4, s8, s12)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13, s14] -> (s0, s14, s2, s6, s10)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13, s14] -> (s3, s7, s11)>
     default_indices:
     - 1
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13, s14] -> (s5, s9, s13)>
     default_indices:
     - 1
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d8, d1 * s3 + d5 * s5, d2 * s7
       + d6 * s9, d3 * s11 + d7 * s13)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> (d4, d8, d5, d6, d7)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d4, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_1d_nwc_wc
   cpp_class_name: DepthwiseConv1DNwcWcOp
   doc: |-
     Performs depth-wise 1-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. Multiplier is set to 1
     which is a special case for most depthwise convolutions.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3, s5)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
     default_indices:
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
     default_indices:
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
       d2)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3, d2)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_1d_ncw_cw
   cpp_class_name: DepthwiseConv1DNcwCwOp
   doc: |-
     Performs depth-wise 1-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. Multiplier is set to 1
     which is a special case for most depthwise convolutions.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2 * s3 + s4 * s5)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s1, s4)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
     default_indices:
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s5)>
     default_indices:
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d2, d1 * s3 + d3
       * s5)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d2, d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d2, d1)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_1d_nwc_wcm
   cpp_class_name: DepthwiseConv1DNwcWcmOp
   doc: |-
     Performs depth-wise 1-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1 * s2 + s3 * s4,
       s5)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s3, s5, s6)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1, s5, s6)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s2)>
     default_indices:
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s4)>
     default_indices:
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1 * s2
       + d4 * s4, d2)>
     - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d4, d2, d3)>
     - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1, d2,
       d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_2d_nhwc_hwc
   cpp_class_name: DepthwiseConv2DNhwcHwcOp
   doc: |-
     Performs depth-wise 2-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. Multiplier is set to 1
     which is a special case for most depthwise convolutions.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
       s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7, s9)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
       s9)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
       s6)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
       s8)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d4, d5, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_2d_nchw_chw
   cpp_class_name: DepthwiseConv2DNchwChwOp
   doc: |-
     Performs depth-wise 2-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. Multiplier is set to 1
     which is a special case for most depthwise convolutions.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2
       * s3 + s4 * s5, s6 * s7 + s8 * s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s1, s4, s8)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2,
       s6)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3,
       s7)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s5,
       s9)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d3, d1 * s3 + d4 * s5, d2 * s7 + d5 * s9)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d3, d4, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d3, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_2d_nhwc_hwc_q
   cpp_class_name: DepthwiseConv2DNhwcHwcQOp
   doc: |-
     Performs depth-wise 2-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
       s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7, s9)>
   - !LinalgOperandDefConfig
     name: IZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: KZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
       s9)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
       s6)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
       s8)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d4, d5, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: I
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: IZp
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: K
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: KZp
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_2d_nhwc_hwcm
   cpp_class_name: DepthwiseConv2DNhwcHwcmOp
   doc: |-
     Performs depth-wise 2-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
       s7, s9, s10)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1, s5, s9, s10)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
       (s2, s6)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
       (s4, s8)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d5, d6, d3, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1, d2, d3, d4)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_2d_nhwc_hwcm_q
   cpp_class_name: DepthwiseConv2DNhwcHwcmQOp
   doc: |-
     Performs depth-wise 2-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
       s7, s9, s10)>
   - !LinalgOperandDefConfig
     name: IZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: KZp
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
       s1, s5, s9, s10)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
       (s2, s6)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
       (s4, s8)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d5, d6, d3, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> ()>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
       s9, s10] -> (d0, d1, d2, d3, d4)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: I
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: IZp
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: sub
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: K
                 - !ScalarExpression
                   scalar_fn:
                     kind: type
                     fn_name: cast_signed
                     type_var: U
                     operands:
                     - !ScalarExpression
                       scalar_arg: KZp
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_3d_ndhwc_dhwc
   cpp_class_name: DepthwiseConv3DNdhwcDhwcOp
   doc: |-
     Performs depth-wise 3-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. Multiplier is set to 1
     which is a special case for most depthwise convolutions.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s3, s7, s11, s13)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1, s5, s9)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s2, s6, s10)>
     default_indices:
     - 1
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s4, s8, s12)>
     default_indices:
     - 1
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3
       * s10 + d6 * s12, d7)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d4, d5, d6, d7)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d7)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   - parallel
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_3d_ncdhw_cdhw
   cpp_class_name: DepthwiseConv3DNcdhwCdhwOp
   doc: |-
     Performs depth-wise 3-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output. Multiplier is set to 1
     which is a special case for most depthwise convolutions.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1, s2 * s3 + s4 * s5, s6 * s7 + s8 * s9, s10 * s11 + s12 * s13)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s1, s4, s8, s12)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1, s2, s6, s10)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s3, s7, s11)>
     default_indices:
     - 1
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s5, s9, s13)>
     default_indices:
     - 1
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d7, d1 * s3 + d4 * s5, d2 * s7 + d5 * s9,
       d3 * s11 + d6 * s13)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d7, d4, d5, d6)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d7, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   - parallel
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: depthwise_conv_3d_ndhwc_dhwcm
   cpp_class_name: DepthwiseConv3DNdhwcDhwcmOp
   doc: |-
     Performs depth-wise 3-D convolution.

     Numeric casting is performed on the operands to the inner multiply, promoting
     them to the same data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13, s14] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12,
       s13)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13, s14] -> (s3, s7, s11, s13, s14)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13, s14] -> (s0, s1, s5, s9, s14)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13, s14] -> (s2, s6, s10)>
     default_indices:
     - 1
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13, s14] -> (s4, s8, s12)>
     default_indices:
     - 1
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6
       * s8, d3 * s10 + d7 * s12, d8)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> (d5, d6, d7, d8, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
       s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1, d2, d3, d8, d4)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   - parallel
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: mul
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: I
             - !ScalarExpression
               scalar_fn:
                 kind: type
                 fn_name: cast_signed
                 type_var: U
                 operands:
                 - !ScalarExpression
                   scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nhwc_sum
   cpp_class_name: PoolingNhwcSumOp
   doc: |-
     Performs sum pooling.

     Layout:
       * Input: NHWC.
       * Kernel: HW.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
       s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
       s9)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
       s6)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
       s8)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d4, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_signed
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nchw_sum
   cpp_class_name: PoolingNchwSumOp
   doc: |-
     Performs sum pooling.

     Layout:
       * Input: NCHW.
       * Kernel: HW.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2
       * s3 + s4 * s5, s6 * s7 + s8 * s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4, s8)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2,
       s6)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3,
       s7)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s5,
       s9)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2 * s3 + d4 * s5, d3 * s7 + d5 * s9)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d4, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_signed
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nhwc_max
   cpp_class_name: PoolingNhwcMaxOp
   doc: |-
     Performs max pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
       s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
       s9)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
       s6)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
       s8)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d4, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: max_signed
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_signed
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nhwc_max_unsigned
   cpp_class_name: PoolingNhwcMaxUnsignedOp
   doc: |-
     Performs unsigned max pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
       s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
       s9)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
       s6)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
       s8)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d4, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: max_unsigned
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_unsigned
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nchw_max
   cpp_class_name: PoolingNchwMaxOp
   doc: |-
     Performs max pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2
       * s3 + s4 * s5, s6 * s7 + s8 * s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4, s8)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2,
       s6)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3,
       s7)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s5,
       s9)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2 * s3 + d4 * s5, d3 * s7 + d5 * s9)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d4, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: max_signed
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_signed
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nhwc_min
   cpp_class_name: PoolingNhwcMinOp
   doc: |-
     Performs min pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
       s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
       s9)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
       s6)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
       s8)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d4, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: min_signed
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_signed
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nhwc_min_unsigned
   cpp_class_name: PoolingNhwcMinUnsignedOp
   doc: |-
     Performs unsigned min pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
       s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
       s9)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
       s6)>
     default_indices:
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
       s8)>
     default_indices:
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d4, d5)>
     - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
       -> (d0, d1, d2, d3)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: min_unsigned
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_unsigned
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nwc_sum
   cpp_class_name: PoolingNwcSumOp
   doc: |-
     Performs sum pooling.

     Layout:
       * Input: NWC.
       * Kernel: W.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
     default_indices:
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
     default_indices:
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
       d2)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_signed
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_ncw_sum
   cpp_class_name: PoolingNcwSumOp
   doc: |-
     Performs sum pooling.

     Layout:
       * Input: NCW.
       * Kernel: W.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2 * s3 + s4 * s5)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
     default_indices:
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s5)>
     default_indices:
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2 * s3 + d3
       * s5)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_signed
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nwc_max
   cpp_class_name: PoolingNwcMaxOp
   doc: |-
     Performs max pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
     default_indices:
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
     default_indices:
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
       d2)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: max_signed
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_signed
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nwc_max_unsigned
   cpp_class_name: PoolingNwcMaxUnsignedOp
   doc: |-
     Performs unsigned max pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
     default_indices:
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
     default_indices:
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
       d2)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: max_unsigned
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_unsigned
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_ncw_max
   cpp_class_name: PoolingNcwMaxOp
   doc: |-
     Performs max pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2 * s3 + s4 * s5)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
     default_indices:
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s5)>
     default_indices:
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2 * s3 + d3
       * s5)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: max_signed
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_signed
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nwc_min
   cpp_class_name: PoolingNwcMinOp
   doc: |-
     Performs min pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
     default_indices:
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
     default_indices:
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
       d2)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: min_signed
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_signed
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_nwc_min_unsigned
   cpp_class_name: PoolingNwcMinUnsignedOp
   doc: |-
     Performs unsigned min pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
     default_indices:
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
     default_indices:
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
       d2)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
     - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: min_unsigned
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_unsigned
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_ndhwc_sum
   cpp_class_name: PoolingNdhwcSumOp
   doc: |-
     Performs 3D sum pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s3, s7, s11)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1, s5, s9, s13)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s2, s6, s10)>
     default_indices:
     - 1
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s4, s8, s12)>
     default_indices:
     - 1
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3
       * s10 + d7 * s12, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d5, d6, d7)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d4)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: add
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_signed
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_ndhwc_max
   cpp_class_name: PoolingNdhwcMaxOp
   doc: |-
     Performs 3D max pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s3, s7, s11)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1, s5, s9, s13)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s2, s6, s10)>
     default_indices:
     - 1
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s4, s8, s12)>
     default_indices:
     - 1
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3
       * s10 + d7 * s12, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d5, d6, d7)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d4)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: max_signed
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_signed
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: pooling_ndhwc_min
   cpp_class_name: PoolingNdhwcMinOp
   doc: |-
     Performs 3D min pooling.

     Numeric casting is performed on the input operand, promoting it to the same
     data type as the accumulator/output.
   implements:
   - LinalgConvolutionOpInterface
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: I
     kind: input_tensor
     type_var: T1
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
   - !LinalgOperandDefConfig
     name: K
     kind: input_tensor
     type_var: T2
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s3, s7, s11)>
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
       s13] -> (s0, s1, s5, s9, s13)>
   - !LinalgOperandDefConfig
     name: strides
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s2, s6, s10)>
     default_indices:
     - 1
     - 1
     - 1
   - !LinalgOperandDefConfig
     name: dilations
     kind: index_attr
     index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
       s12, s13] -> (s4, s8, s12)>
     default_indices:
     - 1
     - 1
     - 1
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3
       * s10 + d7 * s12, d4)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d5, d6, d7)>
     - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
       s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d4)>
   iterator_types:
   - parallel
   - parallel
   - parallel
   - parallel
   - parallel
   - reduction
   - reduction
   - reduction
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: binary
         fn_name: min_signed
         operands:
         - !ScalarExpression
           scalar_arg: O
         - !ScalarExpression
           scalar_fn:
             kind: type
             fn_name: cast_signed
             type_var: U
             operands:
             - !ScalarExpression
               scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: fill
   cpp_class_name: FillOp
   doc: |-
     Fills the output tensor with the given value.

     Works for arbitrary ranked output tensors since the operation performs scalar
     accesses only and is thus rank polymorphic. Numeric casting is performed on
     the value operand, promoting it to the same data type as the output.
   implements:
   - LinalgFillOpInterface
   defines:
   - hasCanonicalizer
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: value
     kind: scalar
     type_var: T1
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: U
     shape_map: affine_map<() -> ()>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<() -> ()>
     - affine_map<() -> ()>
   iterator_types: []
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: type
         fn_name: cast_signed
         type_var: U
         operands:
         - !ScalarExpression
           scalar_arg: value
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
   name: fill_rng_2d
   cpp_class_name: FillRng2DOp
   doc: |-
     Fills the output tensor with pseudo random numbers.

     The operation generations pseudo random numbers using a linear congruential
     generator. It provides no guarantees regarding the distribution of the
     generated random numbers. Instead of generating the random numbers
     sequentially, it instantiates one random number generator per data element
     and runs them in parallel. The seed operand and the indices of the data
     element seed the random number generation. The min and max operands limit
     the range of the generated random numbers.
 structured_op: !LinalgStructuredOpConfig
   args:
   - !LinalgOperandDefConfig
     name: min
     kind: scalar
     type_var: F64
   - !LinalgOperandDefConfig
     name: max
     kind: scalar
     type_var: F64
   - !LinalgOperandDefConfig
     name: seed
     kind: scalar
     type_var: I32
   - !LinalgOperandDefConfig
     name: O
     kind: output_tensor
     type_var: T
     shape_map: affine_map<()[s0, s1] -> (s0, s1)>
   indexing_maps: !LinalgIndexingMapsConfig
     static_indexing_maps:
     - affine_map<(d0, d1)[s0, s1] -> ()>
     - affine_map<(d0, d1)[s0, s1] -> ()>
     - affine_map<(d0, d1)[s0, s1] -> ()>
     - affine_map<(d0, d1)[s0, s1] -> (d0, d1)>
   iterator_types:
   - parallel
   - parallel
   assignments:
   - !ScalarAssign
     arg: O
     value: !ScalarExpression
       scalar_fn:
         kind: type
         fn_name: cast_signed
         type_var: T
         operands:
         - !ScalarExpression
           scalar_fn:
             kind: binary
             fn_name: add
             operands:
             - !ScalarExpression
               scalar_fn:
                 kind: binary
                 fn_name: mul
                 operands:
                 - !ScalarExpression
                   scalar_fn:
                     kind: binary
                     fn_name: add
                     operands:
                     - !ScalarExpression
                       scalar_fn:
                         kind: type
                         fn_name: cast_signed
                         type_var: F64
                         operands:
                         - !ScalarExpression
                           scalar_const: '2147483647 : i64'
                     - !ScalarExpression
                       scalar_fn:
                         kind: type
                         fn_name: cast_signed
                         type_var: F64
                         operands:
                         - !ScalarExpression
                           scalar_fn:
                             kind: binary
                             fn_name: add
                             operands:
                             - !ScalarExpression
                               scalar_fn:
                                 kind: binary
                                 fn_name: mul
                                 operands:
                                 - !ScalarExpression
                                   scalar_fn:
                                     kind: binary
                                     fn_name: add
                                     operands:
                                     - !ScalarExpression
                                       scalar_fn:
                                         kind: type
                                         fn_name: cast_signed
                                         type_var: I32
                                         operands:
                                         - !ScalarExpression
                                           scalar_index: 1
                                     - !ScalarExpression
                                       scalar_fn:
                                         kind: binary
                                         fn_name: add
                                         operands:
                                         - !ScalarExpression
                                           scalar_fn:
                                             kind: binary
                                             fn_name: mul
                                             operands:
                                             - !ScalarExpression
                                               scalar_fn:
                                                 kind: binary
                                                 fn_name: add
                                                 operands:
                                                 - !ScalarExpression
                                                   scalar_fn:
                                                     kind: type
                                                     fn_name: cast_signed
                                                     type_var: I32
                                                     operands:
                                                     - !ScalarExpression
                                                       scalar_index: 0
                                                 - !ScalarExpression
                                                   scalar_arg: seed
                                             - !ScalarExpression
                                               scalar_fn:
                                                 kind: type
                                                 fn_name: cast_signed
                                                 type_var: I32
                                                 operands:
                                                 - !ScalarExpression
                                                   scalar_const: '1103515245 : i64'
                                         - !ScalarExpression
                                           scalar_fn:
                                             kind: type
                                             fn_name: cast_signed
                                             type_var: I32
                                             operands:
                                             - !ScalarExpression
                                               scalar_const: '12345 : i64'
                                 - !ScalarExpression
                                   scalar_fn:
                                     kind: type
                                     fn_name: cast_signed
                                     type_var: I32
                                     operands:
                                     - !ScalarExpression
                                       scalar_const: '1103515245 : i64'
                             - !ScalarExpression
                               scalar_fn:
                                 kind: type
                                 fn_name: cast_signed
                                 type_var: I32
                                 operands:
                                 - !ScalarExpression
                                   scalar_const: '12345 : i64'
                 - !ScalarExpression
                   scalar_fn:
                     kind: binary
                     fn_name: mul
                     operands:
                     - !ScalarExpression
                       scalar_fn:
                         kind: binary
                         fn_name: sub
                         operands:
                         - !ScalarExpression
                           scalar_arg: max
                         - !ScalarExpression
                           scalar_arg: min
                     - !ScalarExpression
                       scalar_fn:
                         kind: type
                         fn_name: cast_signed
                         type_var: F64
                         operands:
                         - !ScalarExpression
                           scalar_const: '2.3283063999999999E-10 : f64'
             - !ScalarExpression
               scalar_arg: min