mlir/include/mlir/Dialect/Quant/QuantOps.td - llvm-project - Git at Google

 //===- QuantOps.td - Quantization operation definition -----*- tablegen -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This is the operation definition file for Quantization.
 //
 //===----------------------------------------------------------------------===//

 #ifndef DIALECT_QUANT_QUANT_OPS_
 #define DIALECT_QUANT_QUANT_OPS_

 include "mlir/Dialect/Quant/QuantOpsBase.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"

 //===----------------------------------------------------------------------===//
 // Base classes
 //===----------------------------------------------------------------------===//

 class quant_Op<string mnemonic, list<OpTrait> traits> :
     Op<Quantization_Dialect, mnemonic, traits>;

 //===----------------------------------------------------------------------===//
 // Quantization casts
 //===----------------------------------------------------------------------===//
 // A QuantizeCast (qcast) represents a potential type shift from a quantizable
 // type to a quantized type.
 //
 // At runtime, a qcast will apply the transformation expressed by its
 // operand and result type. For flexibility during transformation, it is also
 // possible to have a qcast that performs no transformation (both its
 // operand and result type are quantizable).
 //
 // A qcast will typically originate from either:
 //   a) An expressed or implied constraint in the source dialect which signals
 //      that a certain level of quantization is possible or required.
 //   b) An inference made by a quantization algorithm indicating that a
 //      quantized representation may be acceptable.
 //
 // Especially early in transformation, it is common to have pairs of
 // qcast/dcast at points where a transition to a quantized type is
 // required. In addition, it is also common to have an identity qcast
 // (where the operand and result type are not quantized) at all points where
 // it is legal to use a quantized representation (but is not known to be
 // acceptable).
 def quant_QuantizeCastOp : quant_Op<"qcast", [NoSideEffect]> {
   let arguments = (ins quant_RealValueType:$arg);
   let results = (outs quant_RealValueType);
 }

 // A DequantizeCast op (dcast) represents the inverse of a qcast,
 // converting back from a quantized to quantizable (expressed) type.
 //
 // Like qcasts, a dcast is allowed to have both its operand and result
 // as non quantized types. This facilitates transformations and marks edges
 // where the computation must be carried out in the expressed type.
 //
 // Especially early in transformation, it is common to have dcasts on
 // all operands to ops that must operate with the expressed type (typically
 // math ops prior to lowering to target-specific, quantized kernels).
 def quant_DequantizeCastOp : quant_Op<"dcast", [NoSideEffect]> {
   let arguments = (ins quant_RealValueType:$arg);
   let results = (outs quant_RealValueType);
 }

 // A StorageCast (scast) represents a cast from or to a type based on the
 // storage type and a type based on a corresponding quantized type.
 //
 // This op exists to ensure type coherency for between parts of the computation
 // which are operating directly on an underlying storage type and those which
 // operate on quantized values.
 //
 // Examples from storage to quantized type:
 //   i8 -> !quant<"uniform[i8:f32]{1.0}">
 //   tensor<4xi8> -> tensor<4x!quant<"uniform[i8:f32]{1.0}">>
 //   vector<4xi8> -> vector<4x!quant<"uniform[i8:f32]{1.0}">>
 def quant_StorageCastOp : quant_Op<"scast", [NoSideEffect]> {
   let arguments = (ins quant_RealOrStorageValueType:$arg);
   let results = (outs quant_RealOrStorageValueType);
   let hasFolder = 1;
 }

 // A QuantizeRegion (region) represents a quantization unit which wraps
 // high-precision ops with quantization specifications for all the inputs
 // and outputs. Some quantization specifications can be undetermined and
 // derived from other ports by the target specification of the kernel.
 def quant_QuantizeRegionOp : quant_Op<"region", [
     NoSideEffect,
     IsolatedFromAbove,
     SingleBlockImplicitTerminator<"ReturnOp">]> {
   let summary = [{
     The `region` operation wraps high-precision ops as a logical low-precision
     quantized kernel.
   }];

   let arguments = (ins Variadic<AnyType>:$inputs,
                     TypeArrayAttr:$input_specs,
                     TypeArrayAttr:$output_specs,
                     StrAttr:$logical_kernel);
   let results = (outs Variadic<AnyType>:$outputs);
   let regions = (region SizedRegion<1>:$body);
   let verifier = [{ return verifyRegionOp(*this); }];
 }

 def quant_ReturnOp : quant_Op<"return", [Terminator]> {
   let summary = [{
     The `return` operation terminates a quantize region and returns values.
   }];

   let arguments = (ins Variadic<AnyTensor>:$results);
 }

 //===----------------------------------------------------------------------===//
 // Training integration and instrumentation ops
 //===----------------------------------------------------------------------===//

 def quant_ConstFakeQuant : quant_Op<"const_fake_quant",
                                     [SameOperandsAndResultType, NoSideEffect]> {
   let summary = [{
     Simulates the effect of uniform quantization with const range.
   }];

   let description = [{
     Given a const min, max, num_bits and narrow_range attribute, applies the
     same uniform quantization simulation as is done by the TensorFlow
     fake_quant_with_min_max_args op. See the fakeQuantAttrsToType() utility
     method and the quant-convert-simulated-quantization pass for further details.
   }];

   let arguments = (ins
     F32Tensor:$inputs,
     F32Attr:$min,
     F32Attr:$max,
     // The bitwidth of the quantization; between 2 and 16, inclusive.
     I64Attr:$num_bits,
     // Quantization range starts from 0 or 1; starts from 1 if true.
     DefaultValuedAttr<BoolAttr, "false">:$narrow_range,
     // The sign of the quantization.
     DefaultValuedAttr<BoolAttr, "false">:$is_signed
   );

   let results = (outs
     F32Tensor:$outputs
   );
 }

 def quant_ConstFakeQuantPerAxis : quant_Op<"const_fake_quant_per_axis",
                                     [SameOperandsAndResultType, NoSideEffect]> {
   let summary = [{
     Simulates the effect of per axis uniform quantization with const range.
   }];

   let description = [{
     Given a const min, max, num_bits and narrow_range attribute, applies the
     same per axis uniform quantization simulation as is done by the TensorFlow
     fake_quant_with_min_max_vars_per_channel op. See the fakeQuantAttrsToType()
     utility method and the quant-convert-simulated-quantization pass for further
     details.
   }];

   let arguments = (ins
     F32Tensor:$inputs,
     F32ArrayAttr:$min,
     F32ArrayAttr:$max,
     // The quantized dimension of the inputs tensor.
     I64Attr:$axis,
     // The bitwidth of the quantization; between 2 and 16, inclusive.
     I64Attr:$num_bits,
     // Quantization range starts from 0 or 1; starts from 1 if true.
     DefaultValuedAttr<BoolAttr, "false">:$narrow_range,
     // The sign of the quantization.
     DefaultValuedAttr<BoolAttr, "false">:$is_signed
   );

   let results = (outs
     F32Tensor:$outputs
   );
 }

 def quant_StatisticsRefOp : quant_Op<"stats_ref", [SameOperandsAndResultType]> {
   let summary = "Indicates that statistics are resolved by reference.";

   let description = [{
     This op acts as an identity that, when encountered at runtime, should result
     in statistics being collected about about the value of its operand/result.
     Such statistics will be stored with the provided key, allowing this node
     to later be converted to a 'stats' op if statistics with that key have been
     encountered.
   }];

   let arguments = (ins
     quant_RealValueType:$arg,
     StrAttr:$statsKey
   );
   let results = (outs quant_RealValueType);
 }

 def quant_StatisticsOp : quant_Op<"stats", [SameOperandsAndResultType]> {
   let summary = "Identity op which associates statistics with the value.";

   let description = [{
     Associates statistics about the runtime ranges of values observed for
     evaluations of this node.

     Statistics about the entire type are reported in the 'layerStats' attribute
     and those for each axis, in the (optional) `axisStats` attribute. The
     interpretation of each is determined by the last dimension of its shape.
     Currently, only dim=2 is supported, which is interpreted as [min, max].

     `layerStats` must be a rank 1 tensor: [2]
     `axisStats` must be a rank 2 tensor: [N, 2], where N=the slice size
       splitted by the `axis` dimension. For example:

     ```
     <?x?x3x2>, axis=3 => N=2
     <?x?x3x2>, axis=2 => N=6
     ```
   }];

   let arguments = (ins
     quant_RealValueType:$arg,
     ElementsAttr:$layerStats,
     OptionalAttr<ElementsAttr>:$axisStats,
     OptionalAttr<I64Attr>:$axis);
   let results = (outs quant_RealValueType);

   let verifier = [{
     auto tensorArg = arg().getType().dyn_cast<TensorType>();
     if (!tensorArg) return emitOpError("arg needs to be tensor type.");

     // Verify layerStats attribute.
     {
       auto layerStatsType = layerStats().getType();
       if (!layerStatsType.getElementType().isa<FloatType>()) {
         return emitOpError(
             "layerStats must have a floating point element type");
       }
       if (layerStatsType.getRank() != 1 || layerStatsType.getDimSize(0) != 2) {
         return emitOpError("layerStats must have shape [2]");
       }
     }
     // Verify axisStats (optional) attribute.
     if (axisStats()) {
       if (!axis()) return emitOpError("axis must be specified for axisStats");

       auto shape = tensorArg.getShape();
       auto argSliceSize = std::accumulate(std::next(shape.begin(),
         *axis()), shape.end(), 1, std::multiplies<int64_t>());

       auto axisStatsType = axisStats()->getType();
       if (!axisStatsType.getElementType().isa<FloatType>()) {
         return emitOpError("axisStats must have a floating point element type");
       }
       if (axisStatsType.getRank() != 2 ||
           axisStatsType.getDimSize(1) != 2 ||
           axisStatsType.getDimSize(0) != argSliceSize) {
         return emitOpError("axisStats must have shape [N,2] "
                            "where N = the slice size defined by the axis dim");
       }
     }
     return success();
   }];
 }

 def quant_CoupledRefOp : quant_Op<"coupled_ref", [SameOperandsAndResultType]> {
   let summary = [{
     Indicates that one point of the computation is coupled to another.
   }];

   let description = [{
     Ordinarily, relationships between ops for the purposes of determining
     compatible quantized types is explicit based on the use-def chain. However,
     in some situations, a use may be separated from its def by arbitrary
     external connections. In such a case, during analysis, all coupled_ref
     nodes in a module which share a coupledKey will be considered to be
     directly connected as via an identity op for the purpose of type inference.
   }];

   let arguments = (ins
     quant_RealValueType:$arg,
     StrAttr:$coupledKey);
   let results = (outs quant_RealValueType);
 }

 #endif // DIALECT_QUANT_QUANT_OPS_
	//===- QuantOps.td - Quantization operation definition ------ tablegen --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This is the operation definition file for Quantization.
	//
	//===----------------------------------------------------------------------===//

	#ifndef DIALECT_QUANT_QUANT_OPS_
	#define DIALECT_QUANT_QUANT_OPS_

	include "mlir/Dialect/Quant/QuantOpsBase.td"
	include "mlir/Interfaces/InferTypeOpInterface.td"
	include "mlir/Interfaces/SideEffectInterfaces.td"

	//===----------------------------------------------------------------------===//
	// Base classes
	//===----------------------------------------------------------------------===//

	class quant_Op<string mnemonic, list<OpTrait> traits> :
	Op<Quantization_Dialect, mnemonic, traits>;

	//===----------------------------------------------------------------------===//
	// Quantization casts
	//===----------------------------------------------------------------------===//
	// A QuantizeCast (qcast) represents a potential type shift from a quantizable
	// type to a quantized type.
	//
	// At runtime, a qcast will apply the transformation expressed by its
	// operand and result type. For flexibility during transformation, it is also
	// possible to have a qcast that performs no transformation (both its
	// operand and result type are quantizable).
	//
	// A qcast will typically originate from either:
	// a) An expressed or implied constraint in the source dialect which signals
	// that a certain level of quantization is possible or required.
	// b) An inference made by a quantization algorithm indicating that a
	// quantized representation may be acceptable.
	//
	// Especially early in transformation, it is common to have pairs of
	// qcast/dcast at points where a transition to a quantized type is
	// required. In addition, it is also common to have an identity qcast
	// (where the operand and result type are not quantized) at all points where
	// it is legal to use a quantized representation (but is not known to be
	// acceptable).
	def quant_QuantizeCastOp : quant_Op<"qcast", [NoSideEffect]> {
	let arguments = (ins quant_RealValueType:$arg);
	let results = (outs quant_RealValueType);
	}

	// A DequantizeCast op (dcast) represents the inverse of a qcast,
	// converting back from a quantized to quantizable (expressed) type.
	//
	// Like qcasts, a dcast is allowed to have both its operand and result
	// as non quantized types. This facilitates transformations and marks edges
	// where the computation must be carried out in the expressed type.
	//
	// Especially early in transformation, it is common to have dcasts on
	// all operands to ops that must operate with the expressed type (typically
	// math ops prior to lowering to target-specific, quantized kernels).
	def quant_DequantizeCastOp : quant_Op<"dcast", [NoSideEffect]> {
	let arguments = (ins quant_RealValueType:$arg);
	let results = (outs quant_RealValueType);
	}

	// A StorageCast (scast) represents a cast from or to a type based on the
	// storage type and a type based on a corresponding quantized type.
	//
	// This op exists to ensure type coherency for between parts of the computation
	// which are operating directly on an underlying storage type and those which
	// operate on quantized values.
	//
	// Examples from storage to quantized type:
	// i8 -> !quant<"uniform[i8:f32]{1.0}">
	// tensor<4xi8> -> tensor<4x!quant<"uniform[i8:f32]{1.0}">>
	// vector<4xi8> -> vector<4x!quant<"uniform[i8:f32]{1.0}">>
	def quant_StorageCastOp : quant_Op<"scast", [NoSideEffect]> {
	let arguments = (ins quant_RealOrStorageValueType:$arg);
	let results = (outs quant_RealOrStorageValueType);
	let hasFolder = 1;
	}

	// A QuantizeRegion (region) represents a quantization unit which wraps
	// high-precision ops with quantization specifications for all the inputs
	// and outputs. Some quantization specifications can be undetermined and
	// derived from other ports by the target specification of the kernel.
	def quant_QuantizeRegionOp : quant_Op<"region", [
	NoSideEffect,
	IsolatedFromAbove,
	SingleBlockImplicitTerminator<"ReturnOp">]> {
	let summary = [{
	The `region` operation wraps high-precision ops as a logical low-precision
	quantized kernel.
	}];

	let arguments = (ins Variadic<AnyType>:$inputs,
	TypeArrayAttr:$input_specs,
	TypeArrayAttr:$output_specs,
	StrAttr:$logical_kernel);
	let results = (outs Variadic<AnyType>:$outputs);
	let regions = (region SizedRegion<1>:$body);
	let verifier = [{ return verifyRegionOp(*this); }];
	}

	def quant_ReturnOp : quant_Op<"return", [Terminator]> {
	let summary = [{
	The `return` operation terminates a quantize region and returns values.
	}];

	let arguments = (ins Variadic<AnyTensor>:$results);
	}

	//===----------------------------------------------------------------------===//
	// Training integration and instrumentation ops
	//===----------------------------------------------------------------------===//

	def quant_ConstFakeQuant : quant_Op<"const_fake_quant",
	[SameOperandsAndResultType, NoSideEffect]> {
	let summary = [{
	Simulates the effect of uniform quantization with const range.
	}];

	let description = [{
	Given a const min, max, num_bits and narrow_range attribute, applies the
	same uniform quantization simulation as is done by the TensorFlow
	fake_quant_with_min_max_args op. See the fakeQuantAttrsToType() utility
	method and the quant-convert-simulated-quantization pass for further details.
	}];

	let arguments = (ins
	F32Tensor:$inputs,
	F32Attr:$min,
	F32Attr:$max,
	// The bitwidth of the quantization; between 2 and 16, inclusive.
	I64Attr:$num_bits,
	// Quantization range starts from 0 or 1; starts from 1 if true.
	DefaultValuedAttr<BoolAttr, "false">:$narrow_range,
	// The sign of the quantization.
	DefaultValuedAttr<BoolAttr, "false">:$is_signed
	);

	let results = (outs
	F32Tensor:$outputs
	);
	}

	def quant_ConstFakeQuantPerAxis : quant_Op<"const_fake_quant_per_axis",
	[SameOperandsAndResultType, NoSideEffect]> {
	let summary = [{
	Simulates the effect of per axis uniform quantization with const range.
	}];

	let description = [{
	Given a const min, max, num_bits and narrow_range attribute, applies the
	same per axis uniform quantization simulation as is done by the TensorFlow
	fake_quant_with_min_max_vars_per_channel op. See the fakeQuantAttrsToType()
	utility method and the quant-convert-simulated-quantization pass for further
	details.
	}];

	let arguments = (ins
	F32Tensor:$inputs,
	F32ArrayAttr:$min,
	F32ArrayAttr:$max,
	// The quantized dimension of the inputs tensor.
	I64Attr:$axis,
	// The bitwidth of the quantization; between 2 and 16, inclusive.
	I64Attr:$num_bits,
	// Quantization range starts from 0 or 1; starts from 1 if true.
	DefaultValuedAttr<BoolAttr, "false">:$narrow_range,
	// The sign of the quantization.
	DefaultValuedAttr<BoolAttr, "false">:$is_signed
	);

	let results = (outs
	F32Tensor:$outputs
	);
	}

	def quant_StatisticsRefOp : quant_Op<"stats_ref", [SameOperandsAndResultType]> {
	let summary = "Indicates that statistics are resolved by reference.";

	let description = [{
	This op acts as an identity that, when encountered at runtime, should result
	in statistics being collected about about the value of its operand/result.
	Such statistics will be stored with the provided key, allowing this node
	to later be converted to a 'stats' op if statistics with that key have been
	encountered.
	}];

	let arguments = (ins
	quant_RealValueType:$arg,
	StrAttr:$statsKey
	);
	let results = (outs quant_RealValueType);
	}

	def quant_StatisticsOp : quant_Op<"stats", [SameOperandsAndResultType]> {
	let summary = "Identity op which associates statistics with the value.";

	let description = [{
	Associates statistics about the runtime ranges of values observed for
	evaluations of this node.

	Statistics about the entire type are reported in the 'layerStats' attribute
	and those for each axis, in the (optional) `axisStats` attribute. The
	interpretation of each is determined by the last dimension of its shape.
	Currently, only dim=2 is supported, which is interpreted as [min, max].

	`layerStats` must be a rank 1 tensor: [2]
	`axisStats` must be a rank 2 tensor: [N, 2], where N=the slice size
	splitted by the `axis` dimension. For example:

	```
	<?x?x3x2>, axis=3 => N=2
	<?x?x3x2>, axis=2 => N=6
	```
	}];

	let arguments = (ins
	quant_RealValueType:$arg,
	ElementsAttr:$layerStats,
	OptionalAttr<ElementsAttr>:$axisStats,
	OptionalAttr<I64Attr>:$axis);
	let results = (outs quant_RealValueType);

	let verifier = [{
	auto tensorArg = arg().getType().dyn_cast<TensorType>();
	if (!tensorArg) return emitOpError("arg needs to be tensor type.");

	// Verify layerStats attribute.
	{
	auto layerStatsType = layerStats().getType();
	if (!layerStatsType.getElementType().isa<FloatType>()) {
	return emitOpError(
	"layerStats must have a floating point element type");
	}
	if (layerStatsType.getRank() != 1 \|\| layerStatsType.getDimSize(0) != 2) {
	return emitOpError("layerStats must have shape [2]");
	}
	}
	// Verify axisStats (optional) attribute.
	if (axisStats()) {
	if (!axis()) return emitOpError("axis must be specified for axisStats");

	auto shape = tensorArg.getShape();
	auto argSliceSize = std::accumulate(std::next(shape.begin(),
	*axis()), shape.end(), 1, std::multiplies<int64_t>());

	auto axisStatsType = axisStats()->getType();
	if (!axisStatsType.getElementType().isa<FloatType>()) {
	return emitOpError("axisStats must have a floating point element type");
	}
	if (axisStatsType.getRank() != 2 \|\|
	axisStatsType.getDimSize(1) != 2 \|\|
	axisStatsType.getDimSize(0) != argSliceSize) {
	return emitOpError("axisStats must have shape [N,2] "
	"where N = the slice size defined by the axis dim");
	}
	}
	return success();
	}];
	}

	def quant_CoupledRefOp : quant_Op<"coupled_ref", [SameOperandsAndResultType]> {
	let summary = [{
	Indicates that one point of the computation is coupled to another.
	}];

	let description = [{
	Ordinarily, relationships between ops for the purposes of determining
	compatible quantized types is explicit based on the use-def chain. However,
	in some situations, a use may be separated from its def by arbitrary
	external connections. In such a case, during analysis, all coupled_ref
	nodes in a module which share a coupledKey will be considered to be
	directly connected as via an identity op for the purpose of type inference.
	}];

	let arguments = (ins
	quant_RealValueType:$arg,
	StrAttr:$coupledKey);
	let results = (outs quant_RealValueType);
	}

	#endif // DIALECT_QUANT_QUANT_OPS_