lib/Bindings/Python/DialectQuant.cpp - llvm-project/mlir - Git at Google

 //===- DialectQuant.cpp - 'quant' dialect submodule -----------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include <cstdint>
 #include <vector>

 #include "mlir-c/BuiltinAttributes.h"
 #include "mlir-c/Dialect/Quant.h"
 #include "mlir-c/IR.h"
 #include "mlir/Bindings/Python/Nanobind.h"
 #include "mlir/Bindings/Python/NanobindAdaptors.h"

 namespace nb = nanobind;
 using namespace llvm;
 using namespace mlir;
 using namespace mlir::python::nanobind_adaptors;

 static void populateDialectQuantSubmodule(const nb::module_ &m) {
   //===-------------------------------------------------------------------===//
   // QuantizedType
   //===-------------------------------------------------------------------===//

   auto quantizedType =
       mlir_type_subclass(m, "QuantizedType", mlirTypeIsAQuantizedType);
   quantizedType.def_staticmethod(
       "default_minimum_for_integer",
       [](bool isSigned, unsigned integralWidth) {
         return mlirQuantizedTypeGetDefaultMinimumForInteger(isSigned,
                                                             integralWidth);
       },
       "Default minimum value for the integer with the specified signedness and "
       "bit width.",
       nb::arg("is_signed"), nb::arg("integral_width"));
   quantizedType.def_staticmethod(
       "default_maximum_for_integer",
       [](bool isSigned, unsigned integralWidth) {
         return mlirQuantizedTypeGetDefaultMaximumForInteger(isSigned,
                                                             integralWidth);
       },
       "Default maximum value for the integer with the specified signedness and "
       "bit width.",
       nb::arg("is_signed"), nb::arg("integral_width"));
   quantizedType.def_property_readonly(
       "expressed_type",
       [](MlirType type) { return mlirQuantizedTypeGetExpressedType(type); },
       "Type expressed by this quantized type.");
   quantizedType.def_property_readonly(
       "flags", [](MlirType type) { return mlirQuantizedTypeGetFlags(type); },
       "Flags of this quantized type (named accessors should be preferred to "
       "this)");
   quantizedType.def_property_readonly(
       "is_signed",
       [](MlirType type) { return mlirQuantizedTypeIsSigned(type); },
       "Signedness of this quantized type.");
   quantizedType.def_property_readonly(
       "storage_type",
       [](MlirType type) { return mlirQuantizedTypeGetStorageType(type); },
       "Storage type backing this quantized type.");
   quantizedType.def_property_readonly(
       "storage_type_min",
       [](MlirType type) { return mlirQuantizedTypeGetStorageTypeMin(type); },
       "The minimum value held by the storage type of this quantized type.");
   quantizedType.def_property_readonly(
       "storage_type_max",
       [](MlirType type) { return mlirQuantizedTypeGetStorageTypeMax(type); },
       "The maximum value held by the storage type of this quantized type.");
   quantizedType.def_property_readonly(
       "storage_type_integral_width",
       [](MlirType type) {
         return mlirQuantizedTypeGetStorageTypeIntegralWidth(type);
       },
       "The bitwidth of the storage type of this quantized type.");
   quantizedType.def(
       "is_compatible_expressed_type",
       [](MlirType type, MlirType candidate) {
         return mlirQuantizedTypeIsCompatibleExpressedType(type, candidate);
       },
       "Checks whether the candidate type can be expressed by this quantized "
       "type.",
       nb::arg("candidate"));
   quantizedType.def_property_readonly(
       "quantized_element_type",
       [](MlirType type) {
         return mlirQuantizedTypeGetQuantizedElementType(type);
       },
       "Element type of this quantized type expressed as quantized type.");
   quantizedType.def(
       "cast_from_storage_type",
       [](MlirType type, MlirType candidate) {
         MlirType castResult =
             mlirQuantizedTypeCastFromStorageType(type, candidate);
         if (!mlirTypeIsNull(castResult))
           return castResult;
         throw nb::type_error("Invalid cast.");
       },
       "Casts from a type based on the storage type of this quantized type to a "
       "corresponding type based on the quantized type. Raises TypeError if the "
       "cast is not valid.",
       nb::arg("candidate"));
   quantizedType.def_staticmethod(
       "cast_to_storage_type",
       [](MlirType type) {
         MlirType castResult = mlirQuantizedTypeCastToStorageType(type);
         if (!mlirTypeIsNull(castResult))
           return castResult;
         throw nb::type_error("Invalid cast.");
       },
       "Casts from a type based on a quantized type to a corresponding type "
       "based on the storage type of this quantized type. Raises TypeError if "
       "the cast is not valid.",
       nb::arg("type"));
   quantizedType.def(
       "cast_from_expressed_type",
       [](MlirType type, MlirType candidate) {
         MlirType castResult =
             mlirQuantizedTypeCastFromExpressedType(type, candidate);
         if (!mlirTypeIsNull(castResult))
           return castResult;
         throw nb::type_error("Invalid cast.");
       },
       "Casts from a type based on the expressed type of this quantized type to "
       "a corresponding type based on the quantized type. Raises TypeError if "
       "the cast is not valid.",
       nb::arg("candidate"));
   quantizedType.def_staticmethod(
       "cast_to_expressed_type",
       [](MlirType type) {
         MlirType castResult = mlirQuantizedTypeCastToExpressedType(type);
         if (!mlirTypeIsNull(castResult))
           return castResult;
         throw nb::type_error("Invalid cast.");
       },
       "Casts from a type based on a quantized type to a corresponding type "
       "based on the expressed type of this quantized type. Raises TypeError if "
       "the cast is not valid.",
       nb::arg("type"));
   quantizedType.def(
       "cast_expressed_to_storage_type",
       [](MlirType type, MlirType candidate) {
         MlirType castResult =
             mlirQuantizedTypeCastExpressedToStorageType(type, candidate);
         if (!mlirTypeIsNull(castResult))
           return castResult;
         throw nb::type_error("Invalid cast.");
       },
       "Casts from a type based on the expressed type of this quantized type to "
       "a corresponding type based on the storage type. Raises TypeError if the "
       "cast is not valid.",
       nb::arg("candidate"));

   quantizedType.get_class().attr("FLAG_SIGNED") =
       mlirQuantizedTypeGetSignedFlag();

   //===-------------------------------------------------------------------===//
   // AnyQuantizedType
   //===-------------------------------------------------------------------===//

   auto anyQuantizedType =
       mlir_type_subclass(m, "AnyQuantizedType", mlirTypeIsAAnyQuantizedType,
                          quantizedType.get_class());
   anyQuantizedType.def_classmethod(
       "get",
       [](const nb::object &cls, unsigned flags, MlirType storageType,
          MlirType expressedType, int64_t storageTypeMin,
          int64_t storageTypeMax) {
         return cls(mlirAnyQuantizedTypeGet(flags, storageType, expressedType,
                                            storageTypeMin, storageTypeMax));
       },
       "Gets an instance of AnyQuantizedType in the same context as the "
       "provided storage type.",
       nb::arg("cls"), nb::arg("flags"), nb::arg("storage_type"),
       nb::arg("expressed_type"), nb::arg("storage_type_min"),
       nb::arg("storage_type_max"));

   //===-------------------------------------------------------------------===//
   // UniformQuantizedType
   //===-------------------------------------------------------------------===//

   auto uniformQuantizedType = mlir_type_subclass(
       m, "UniformQuantizedType", mlirTypeIsAUniformQuantizedType,
       quantizedType.get_class());
   uniformQuantizedType.def_classmethod(
       "get",
       [](const nb::object &cls, unsigned flags, MlirType storageType,
          MlirType expressedType, double scale, int64_t zeroPoint,
          int64_t storageTypeMin, int64_t storageTypeMax) {
         return cls(mlirUniformQuantizedTypeGet(flags, storageType,
                                                expressedType, scale, zeroPoint,
                                                storageTypeMin, storageTypeMax));
       },
       "Gets an instance of UniformQuantizedType in the same context as the "
       "provided storage type.",
       nb::arg("cls"), nb::arg("flags"), nb::arg("storage_type"),
       nb::arg("expressed_type"), nb::arg("scale"), nb::arg("zero_point"),
       nb::arg("storage_type_min"), nb::arg("storage_type_max"));
   uniformQuantizedType.def_property_readonly(
       "scale",
       [](MlirType type) { return mlirUniformQuantizedTypeGetScale(type); },
       "The scale designates the difference between the real values "
       "corresponding to consecutive quantized values differing by 1.");
   uniformQuantizedType.def_property_readonly(
       "zero_point",
       [](MlirType type) { return mlirUniformQuantizedTypeGetZeroPoint(type); },
       "The storage value corresponding to the real value 0 in the affine "
       "equation.");
   uniformQuantizedType.def_property_readonly(
       "is_fixed_point",
       [](MlirType type) { return mlirUniformQuantizedTypeIsFixedPoint(type); },
       "Fixed point values are real numbers divided by a scale.");

   //===-------------------------------------------------------------------===//
   // UniformQuantizedPerAxisType
   //===-------------------------------------------------------------------===//
   auto uniformQuantizedPerAxisType = mlir_type_subclass(
       m, "UniformQuantizedPerAxisType", mlirTypeIsAUniformQuantizedPerAxisType,
       quantizedType.get_class());
   uniformQuantizedPerAxisType.def_classmethod(
       "get",
       [](const nb::object &cls, unsigned flags, MlirType storageType,
          MlirType expressedType, std::vector<double> scales,
          std::vector<int64_t> zeroPoints, int32_t quantizedDimension,
          int64_t storageTypeMin, int64_t storageTypeMax) {
         if (scales.size() != zeroPoints.size())
           throw nb::value_error(
               "Mismatching number of scales and zero points.");
         auto nDims = static_cast<intptr_t>(scales.size());
         return cls(mlirUniformQuantizedPerAxisTypeGet(
             flags, storageType, expressedType, nDims, scales.data(),
             zeroPoints.data(), quantizedDimension, storageTypeMin,
             storageTypeMax));
       },
       "Gets an instance of UniformQuantizedPerAxisType in the same context as "
       "the provided storage type.",
       nb::arg("cls"), nb::arg("flags"), nb::arg("storage_type"),
       nb::arg("expressed_type"), nb::arg("scales"), nb::arg("zero_points"),
       nb::arg("quantized_dimension"), nb::arg("storage_type_min"),
       nb::arg("storage_type_max"));
   uniformQuantizedPerAxisType.def_property_readonly(
       "scales",
       [](MlirType type) {
         intptr_t nDim = mlirUniformQuantizedPerAxisTypeGetNumDims(type);
         std::vector<double> scales;
         scales.reserve(nDim);
         for (intptr_t i = 0; i < nDim; ++i) {
           double scale = mlirUniformQuantizedPerAxisTypeGetScale(type, i);
           scales.push_back(scale);
         }
         return scales;
       },
       "The scales designate the difference between the real values "
       "corresponding to consecutive quantized values differing by 1. The ith "
       "scale corresponds to the ith slice in the quantized_dimension.");
   uniformQuantizedPerAxisType.def_property_readonly(
       "zero_points",
       [](MlirType type) {
         intptr_t nDim = mlirUniformQuantizedPerAxisTypeGetNumDims(type);
         std::vector<int64_t> zeroPoints;
         zeroPoints.reserve(nDim);
         for (intptr_t i = 0; i < nDim; ++i) {
           int64_t zeroPoint =
               mlirUniformQuantizedPerAxisTypeGetZeroPoint(type, i);
           zeroPoints.push_back(zeroPoint);
         }
         return zeroPoints;
       },
       "the storage values corresponding to the real value 0 in the affine "
       "equation. The ith zero point corresponds to the ith slice in the "
       "quantized_dimension.");
   uniformQuantizedPerAxisType.def_property_readonly(
       "quantized_dimension",
       [](MlirType type) {
         return mlirUniformQuantizedPerAxisTypeGetQuantizedDimension(type);
       },
       "Specifies the dimension of the shape that the scales and zero points "
       "correspond to.");
   uniformQuantizedPerAxisType.def_property_readonly(
       "is_fixed_point",
       [](MlirType type) {
         return mlirUniformQuantizedPerAxisTypeIsFixedPoint(type);
       },
       "Fixed point values are real numbers divided by a scale.");

   //===-------------------------------------------------------------------===//
   // UniformQuantizedSubChannelType
   //===-------------------------------------------------------------------===//
   auto uniformQuantizedSubChannelType = mlir_type_subclass(
       m, "UniformQuantizedSubChannelType",
       mlirTypeIsAUniformQuantizedSubChannelType, quantizedType.get_class());
   uniformQuantizedSubChannelType.def_classmethod(
       "get",
       [](const nb::object &cls, unsigned flags, MlirType storageType,
          MlirType expressedType, MlirAttribute scales, MlirAttribute zeroPoints,
          std::vector<int32_t> quantizedDimensions,
          std::vector<int64_t> blockSizes, int64_t storageTypeMin,
          int64_t storageTypeMax) {
         return cls(mlirUniformQuantizedSubChannelTypeGet(
             flags, storageType, expressedType, scales, zeroPoints,
             static_cast<intptr_t>(blockSizes.size()),
             quantizedDimensions.data(), blockSizes.data(), storageTypeMin,
             storageTypeMax));
       },
       "Gets an instance of UniformQuantizedSubChannel in the same context as "
       "the provided storage type.",
       nb::arg("cls"), nb::arg("flags"), nb::arg("storage_type"),
       nb::arg("expressed_type"), nb::arg("scales"), nb::arg("zero_points"),
       nb::arg("quantized_dimensions"), nb::arg("block_sizes"),
       nb::arg("storage_type_min"), nb::arg("storage_type_max"));
   uniformQuantizedSubChannelType.def_property_readonly(
       "quantized_dimensions",
       [](MlirType type) {
         intptr_t nDim =
             mlirUniformQuantizedSubChannelTypeGetNumBlockSizes(type);
         std::vector<int32_t> quantizedDimensions;
         quantizedDimensions.reserve(nDim);
         for (intptr_t i = 0; i < nDim; ++i) {
           quantizedDimensions.push_back(
               mlirUniformQuantizedSubChannelTypeGetQuantizedDimension(type, i));
         }
         return quantizedDimensions;
       },
       "Gets the quantized dimensions. Each element in the returned list "
       "represents an axis of the quantized data tensor that has a specified "
       "block size. The order of elements corresponds to the order of block "
       "sizes returned by 'block_sizes' method. It means that the data tensor "
       "is quantized along the i-th dimension in the returned list using the "
       "i-th block size from block_sizes method.");
   uniformQuantizedSubChannelType.def_property_readonly(
       "block_sizes",
       [](MlirType type) {
         intptr_t nDim =
             mlirUniformQuantizedSubChannelTypeGetNumBlockSizes(type);
         std::vector<int64_t> blockSizes;
         blockSizes.reserve(nDim);
         for (intptr_t i = 0; i < nDim; ++i) {
           blockSizes.push_back(
               mlirUniformQuantizedSubChannelTypeGetBlockSize(type, i));
         }
         return blockSizes;
       },
       "Gets the block sizes for the quantized dimensions. The i-th element in "
       "the returned list corresponds to the block size for the i-th dimension "
       "in the list returned by quantized_dimensions method.");
   uniformQuantizedSubChannelType.def_property_readonly(
       "scales",
       [](MlirType type) -> MlirAttribute {
         return mlirUniformQuantizedSubChannelTypeGetScales(type);
       },
       "The scales of the quantized type.");
   uniformQuantizedSubChannelType.def_property_readonly(
       "zero_points",
       [](MlirType type) -> MlirAttribute {
         return mlirUniformQuantizedSubChannelTypeGetZeroPoints(type);
       },
       "The zero points of the quantized type.");

   //===-------------------------------------------------------------------===//
   // CalibratedQuantizedType
   //===-------------------------------------------------------------------===//

   auto calibratedQuantizedType = mlir_type_subclass(
       m, "CalibratedQuantizedType", mlirTypeIsACalibratedQuantizedType,
       quantizedType.get_class());
   calibratedQuantizedType.def_classmethod(
       "get",
       [](const nb::object &cls, MlirType expressedType, double min,
          double max) {
         return cls(mlirCalibratedQuantizedTypeGet(expressedType, min, max));
       },
       "Gets an instance of CalibratedQuantizedType in the same context as the "
       "provided expressed type.",
       nb::arg("cls"), nb::arg("expressed_type"), nb::arg("min"),
       nb::arg("max"));
   calibratedQuantizedType.def_property_readonly("min", [](MlirType type) {
     return mlirCalibratedQuantizedTypeGetMin(type);
   });
   calibratedQuantizedType.def_property_readonly("max", [](MlirType type) {
     return mlirCalibratedQuantizedTypeGetMax(type);
   });
 }

 NB_MODULE(_mlirDialectsQuant, m) {
   m.doc() = "MLIR Quantization dialect";

   populateDialectQuantSubmodule(m);
 }
	//===- DialectQuant.cpp - 'quant' dialect submodule -----------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include <cstdint>
	#include <vector>

	#include "mlir-c/BuiltinAttributes.h"
	#include "mlir-c/Dialect/Quant.h"
	#include "mlir-c/IR.h"
	#include "mlir/Bindings/Python/Nanobind.h"
	#include "mlir/Bindings/Python/NanobindAdaptors.h"

	namespace nb = nanobind;
	using namespace llvm;
	using namespace mlir;
	using namespace mlir::python::nanobind_adaptors;

	static void populateDialectQuantSubmodule(const nb::module_ &m) {
	//===-------------------------------------------------------------------===//
	// QuantizedType
	//===-------------------------------------------------------------------===//

	auto quantizedType =
	mlir_type_subclass(m, "QuantizedType", mlirTypeIsAQuantizedType);
	quantizedType.def_staticmethod(
	"default_minimum_for_integer",
	[](bool isSigned, unsigned integralWidth) {
	return mlirQuantizedTypeGetDefaultMinimumForInteger(isSigned,
	integralWidth);
	},
	"Default minimum value for the integer with the specified signedness and "
	"bit width.",
	nb::arg("is_signed"), nb::arg("integral_width"));
	quantizedType.def_staticmethod(
	"default_maximum_for_integer",
	[](bool isSigned, unsigned integralWidth) {
	return mlirQuantizedTypeGetDefaultMaximumForInteger(isSigned,
	integralWidth);
	},
	"Default maximum value for the integer with the specified signedness and "
	"bit width.",
	nb::arg("is_signed"), nb::arg("integral_width"));
	quantizedType.def_property_readonly(
	"expressed_type",
	[](MlirType type) { return mlirQuantizedTypeGetExpressedType(type); },
	"Type expressed by this quantized type.");
	quantizedType.def_property_readonly(
	"flags", [](MlirType type) { return mlirQuantizedTypeGetFlags(type); },
	"Flags of this quantized type (named accessors should be preferred to "
	"this)");
	quantizedType.def_property_readonly(
	"is_signed",
	[](MlirType type) { return mlirQuantizedTypeIsSigned(type); },
	"Signedness of this quantized type.");
	quantizedType.def_property_readonly(
	"storage_type",
	[](MlirType type) { return mlirQuantizedTypeGetStorageType(type); },
	"Storage type backing this quantized type.");
	quantizedType.def_property_readonly(
	"storage_type_min",
	[](MlirType type) { return mlirQuantizedTypeGetStorageTypeMin(type); },
	"The minimum value held by the storage type of this quantized type.");
	quantizedType.def_property_readonly(
	"storage_type_max",
	[](MlirType type) { return mlirQuantizedTypeGetStorageTypeMax(type); },
	"The maximum value held by the storage type of this quantized type.");
	quantizedType.def_property_readonly(
	"storage_type_integral_width",
	[](MlirType type) {
	return mlirQuantizedTypeGetStorageTypeIntegralWidth(type);
	},
	"The bitwidth of the storage type of this quantized type.");
	quantizedType.def(
	"is_compatible_expressed_type",
	[](MlirType type, MlirType candidate) {
	return mlirQuantizedTypeIsCompatibleExpressedType(type, candidate);
	},
	"Checks whether the candidate type can be expressed by this quantized "
	"type.",
	nb::arg("candidate"));
	quantizedType.def_property_readonly(
	"quantized_element_type",
	[](MlirType type) {
	return mlirQuantizedTypeGetQuantizedElementType(type);
	},
	"Element type of this quantized type expressed as quantized type.");
	quantizedType.def(
	"cast_from_storage_type",
	[](MlirType type, MlirType candidate) {
	MlirType castResult =
	mlirQuantizedTypeCastFromStorageType(type, candidate);
	if (!mlirTypeIsNull(castResult))
	return castResult;
	throw nb::type_error("Invalid cast.");
	},
	"Casts from a type based on the storage type of this quantized type to a "
	"corresponding type based on the quantized type. Raises TypeError if the "
	"cast is not valid.",
	nb::arg("candidate"));
	quantizedType.def_staticmethod(
	"cast_to_storage_type",
	[](MlirType type) {
	MlirType castResult = mlirQuantizedTypeCastToStorageType(type);
	if (!mlirTypeIsNull(castResult))
	return castResult;
	throw nb::type_error("Invalid cast.");
	},
	"Casts from a type based on a quantized type to a corresponding type "
	"based on the storage type of this quantized type. Raises TypeError if "
	"the cast is not valid.",
	nb::arg("type"));
	quantizedType.def(
	"cast_from_expressed_type",
	[](MlirType type, MlirType candidate) {
	MlirType castResult =
	mlirQuantizedTypeCastFromExpressedType(type, candidate);
	if (!mlirTypeIsNull(castResult))
	return castResult;
	throw nb::type_error("Invalid cast.");
	},
	"Casts from a type based on the expressed type of this quantized type to "
	"a corresponding type based on the quantized type. Raises TypeError if "
	"the cast is not valid.",
	nb::arg("candidate"));
	quantizedType.def_staticmethod(
	"cast_to_expressed_type",
	[](MlirType type) {
	MlirType castResult = mlirQuantizedTypeCastToExpressedType(type);
	if (!mlirTypeIsNull(castResult))
	return castResult;
	throw nb::type_error("Invalid cast.");
	},
	"Casts from a type based on a quantized type to a corresponding type "
	"based on the expressed type of this quantized type. Raises TypeError if "
	"the cast is not valid.",
	nb::arg("type"));
	quantizedType.def(
	"cast_expressed_to_storage_type",
	[](MlirType type, MlirType candidate) {
	MlirType castResult =
	mlirQuantizedTypeCastExpressedToStorageType(type, candidate);
	if (!mlirTypeIsNull(castResult))
	return castResult;
	throw nb::type_error("Invalid cast.");
	},
	"Casts from a type based on the expressed type of this quantized type to "
	"a corresponding type based on the storage type. Raises TypeError if the "
	"cast is not valid.",
	nb::arg("candidate"));

	quantizedType.get_class().attr("FLAG_SIGNED") =
	mlirQuantizedTypeGetSignedFlag();

	//===-------------------------------------------------------------------===//
	// AnyQuantizedType
	//===-------------------------------------------------------------------===//

	auto anyQuantizedType =
	mlir_type_subclass(m, "AnyQuantizedType", mlirTypeIsAAnyQuantizedType,
	quantizedType.get_class());
	anyQuantizedType.def_classmethod(
	"get",
	[](const nb::object &cls, unsigned flags, MlirType storageType,
	MlirType expressedType, int64_t storageTypeMin,
	int64_t storageTypeMax) {
	return cls(mlirAnyQuantizedTypeGet(flags, storageType, expressedType,
	storageTypeMin, storageTypeMax));
	},
	"Gets an instance of AnyQuantizedType in the same context as the "
	"provided storage type.",
	nb::arg("cls"), nb::arg("flags"), nb::arg("storage_type"),
	nb::arg("expressed_type"), nb::arg("storage_type_min"),
	nb::arg("storage_type_max"));

	//===-------------------------------------------------------------------===//
	// UniformQuantizedType
	//===-------------------------------------------------------------------===//

	auto uniformQuantizedType = mlir_type_subclass(
	m, "UniformQuantizedType", mlirTypeIsAUniformQuantizedType,
	quantizedType.get_class());
	uniformQuantizedType.def_classmethod(
	"get",
	[](const nb::object &cls, unsigned flags, MlirType storageType,
	MlirType expressedType, double scale, int64_t zeroPoint,
	int64_t storageTypeMin, int64_t storageTypeMax) {
	return cls(mlirUniformQuantizedTypeGet(flags, storageType,
	expressedType, scale, zeroPoint,
	storageTypeMin, storageTypeMax));
	},
	"Gets an instance of UniformQuantizedType in the same context as the "
	"provided storage type.",
	nb::arg("cls"), nb::arg("flags"), nb::arg("storage_type"),
	nb::arg("expressed_type"), nb::arg("scale"), nb::arg("zero_point"),
	nb::arg("storage_type_min"), nb::arg("storage_type_max"));
	uniformQuantizedType.def_property_readonly(
	"scale",
	[](MlirType type) { return mlirUniformQuantizedTypeGetScale(type); },
	"The scale designates the difference between the real values "
	"corresponding to consecutive quantized values differing by 1.");
	uniformQuantizedType.def_property_readonly(
	"zero_point",
	[](MlirType type) { return mlirUniformQuantizedTypeGetZeroPoint(type); },
	"The storage value corresponding to the real value 0 in the affine "
	"equation.");
	uniformQuantizedType.def_property_readonly(
	"is_fixed_point",
	[](MlirType type) { return mlirUniformQuantizedTypeIsFixedPoint(type); },
	"Fixed point values are real numbers divided by a scale.");

	//===-------------------------------------------------------------------===//
	// UniformQuantizedPerAxisType
	//===-------------------------------------------------------------------===//
	auto uniformQuantizedPerAxisType = mlir_type_subclass(
	m, "UniformQuantizedPerAxisType", mlirTypeIsAUniformQuantizedPerAxisType,
	quantizedType.get_class());
	uniformQuantizedPerAxisType.def_classmethod(
	"get",
	[](const nb::object &cls, unsigned flags, MlirType storageType,
	MlirType expressedType, std::vector<double> scales,
	std::vector<int64_t> zeroPoints, int32_t quantizedDimension,
	int64_t storageTypeMin, int64_t storageTypeMax) {
	if (scales.size() != zeroPoints.size())
	throw nb::value_error(
	"Mismatching number of scales and zero points.");
	auto nDims = static_cast<intptr_t>(scales.size());
	return cls(mlirUniformQuantizedPerAxisTypeGet(
	flags, storageType, expressedType, nDims, scales.data(),
	zeroPoints.data(), quantizedDimension, storageTypeMin,
	storageTypeMax));
	},
	"Gets an instance of UniformQuantizedPerAxisType in the same context as "
	"the provided storage type.",
	nb::arg("cls"), nb::arg("flags"), nb::arg("storage_type"),
	nb::arg("expressed_type"), nb::arg("scales"), nb::arg("zero_points"),
	nb::arg("quantized_dimension"), nb::arg("storage_type_min"),
	nb::arg("storage_type_max"));
	uniformQuantizedPerAxisType.def_property_readonly(
	"scales",
	[](MlirType type) {
	intptr_t nDim = mlirUniformQuantizedPerAxisTypeGetNumDims(type);
	std::vector<double> scales;
	scales.reserve(nDim);
	for (intptr_t i = 0; i < nDim; ++i) {
	double scale = mlirUniformQuantizedPerAxisTypeGetScale(type, i);
	scales.push_back(scale);
	}
	return scales;
	},
	"The scales designate the difference between the real values "
	"corresponding to consecutive quantized values differing by 1. The ith "
	"scale corresponds to the ith slice in the quantized_dimension.");
	uniformQuantizedPerAxisType.def_property_readonly(
	"zero_points",
	[](MlirType type) {
	intptr_t nDim = mlirUniformQuantizedPerAxisTypeGetNumDims(type);
	std::vector<int64_t> zeroPoints;
	zeroPoints.reserve(nDim);
	for (intptr_t i = 0; i < nDim; ++i) {
	int64_t zeroPoint =
	mlirUniformQuantizedPerAxisTypeGetZeroPoint(type, i);
	zeroPoints.push_back(zeroPoint);
	}
	return zeroPoints;
	},
	"the storage values corresponding to the real value 0 in the affine "
	"equation. The ith zero point corresponds to the ith slice in the "
	"quantized_dimension.");
	uniformQuantizedPerAxisType.def_property_readonly(
	"quantized_dimension",
	[](MlirType type) {
	return mlirUniformQuantizedPerAxisTypeGetQuantizedDimension(type);
	},
	"Specifies the dimension of the shape that the scales and zero points "
	"correspond to.");
	uniformQuantizedPerAxisType.def_property_readonly(
	"is_fixed_point",
	[](MlirType type) {
	return mlirUniformQuantizedPerAxisTypeIsFixedPoint(type);
	},
	"Fixed point values are real numbers divided by a scale.");

	//===-------------------------------------------------------------------===//
	// UniformQuantizedSubChannelType
	//===-------------------------------------------------------------------===//
	auto uniformQuantizedSubChannelType = mlir_type_subclass(
	m, "UniformQuantizedSubChannelType",
	mlirTypeIsAUniformQuantizedSubChannelType, quantizedType.get_class());
	uniformQuantizedSubChannelType.def_classmethod(
	"get",
	[](const nb::object &cls, unsigned flags, MlirType storageType,
	MlirType expressedType, MlirAttribute scales, MlirAttribute zeroPoints,
	std::vector<int32_t> quantizedDimensions,
	std::vector<int64_t> blockSizes, int64_t storageTypeMin,
	int64_t storageTypeMax) {
	return cls(mlirUniformQuantizedSubChannelTypeGet(
	flags, storageType, expressedType, scales, zeroPoints,
	static_cast<intptr_t>(blockSizes.size()),
	quantizedDimensions.data(), blockSizes.data(), storageTypeMin,
	storageTypeMax));
	},
	"Gets an instance of UniformQuantizedSubChannel in the same context as "
	"the provided storage type.",
	nb::arg("cls"), nb::arg("flags"), nb::arg("storage_type"),
	nb::arg("expressed_type"), nb::arg("scales"), nb::arg("zero_points"),
	nb::arg("quantized_dimensions"), nb::arg("block_sizes"),
	nb::arg("storage_type_min"), nb::arg("storage_type_max"));
	uniformQuantizedSubChannelType.def_property_readonly(
	"quantized_dimensions",
	[](MlirType type) {
	intptr_t nDim =
	mlirUniformQuantizedSubChannelTypeGetNumBlockSizes(type);
	std::vector<int32_t> quantizedDimensions;
	quantizedDimensions.reserve(nDim);
	for (intptr_t i = 0; i < nDim; ++i) {
	quantizedDimensions.push_back(
	mlirUniformQuantizedSubChannelTypeGetQuantizedDimension(type, i));
	}
	return quantizedDimensions;
	},
	"Gets the quantized dimensions. Each element in the returned list "
	"represents an axis of the quantized data tensor that has a specified "
	"block size. The order of elements corresponds to the order of block "
	"sizes returned by 'block_sizes' method. It means that the data tensor "
	"is quantized along the i-th dimension in the returned list using the "
	"i-th block size from block_sizes method.");
	uniformQuantizedSubChannelType.def_property_readonly(
	"block_sizes",
	[](MlirType type) {
	intptr_t nDim =
	mlirUniformQuantizedSubChannelTypeGetNumBlockSizes(type);
	std::vector<int64_t> blockSizes;
	blockSizes.reserve(nDim);
	for (intptr_t i = 0; i < nDim; ++i) {
	blockSizes.push_back(
	mlirUniformQuantizedSubChannelTypeGetBlockSize(type, i));
	}
	return blockSizes;
	},
	"Gets the block sizes for the quantized dimensions. The i-th element in "
	"the returned list corresponds to the block size for the i-th dimension "
	"in the list returned by quantized_dimensions method.");
	uniformQuantizedSubChannelType.def_property_readonly(
	"scales",
	[](MlirType type) -> MlirAttribute {
	return mlirUniformQuantizedSubChannelTypeGetScales(type);
	},
	"The scales of the quantized type.");
	uniformQuantizedSubChannelType.def_property_readonly(
	"zero_points",
	[](MlirType type) -> MlirAttribute {
	return mlirUniformQuantizedSubChannelTypeGetZeroPoints(type);
	},
	"The zero points of the quantized type.");

	//===-------------------------------------------------------------------===//
	// CalibratedQuantizedType
	//===-------------------------------------------------------------------===//

	auto calibratedQuantizedType = mlir_type_subclass(
	m, "CalibratedQuantizedType", mlirTypeIsACalibratedQuantizedType,
	quantizedType.get_class());
	calibratedQuantizedType.def_classmethod(
	"get",
	[](const nb::object &cls, MlirType expressedType, double min,
	double max) {
	return cls(mlirCalibratedQuantizedTypeGet(expressedType, min, max));
	},
	"Gets an instance of CalibratedQuantizedType in the same context as the "
	"provided expressed type.",
	nb::arg("cls"), nb::arg("expressed_type"), nb::arg("min"),
	nb::arg("max"));
	calibratedQuantizedType.def_property_readonly("min", [](MlirType type) {
	return mlirCalibratedQuantizedTypeGetMin(type);
	});
	calibratedQuantizedType.def_property_readonly("max", [](MlirType type) {
	return mlirCalibratedQuantizedTypeGetMax(type);
	});
	}

	NB_MODULE(_mlirDialectsQuant, m) {
	m.doc() = "MLIR Quantization dialect";

	populateDialectQuantSubmodule(m);
	}