mlir/lib/Dialect/Quant/Utils/FakeQuantSupport.cpp - llvm-project - Git at Google

 //===- FakeQuantSupport.cpp - Support utilities for FakeQuant ops ---------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "mlir/Dialect/Quant/FakeQuantSupport.h"
 #include "mlir/Dialect/Quant/QuantTypes.h"

 using namespace mlir;
 using namespace mlir::quant;

 static bool getDefaultStorageParams(unsigned numBits, bool narrowRange,
                                     bool isSigned, MLIRContext *ctx,
                                     Type &storageType, int64_t &qmin,
                                     int64_t &qmax) {
   // Hard-coded type mapping from TFLite.
   if (numBits <= 8) {
     storageType = IntegerType::get(ctx, 8);
     if (isSigned) {
       qmin = -128;
       qmax = 127;
     } else {
       qmin = 0;
       qmax = 255;
     }
   } else if (numBits <= 16) {
     storageType = IntegerType::get(ctx, 16);
     if (isSigned) {
       qmin = -32768;
       qmax = 32767;
     } else {
       qmin = 0;
       qmax = 65535;
     }
   } else if (numBits <= 32) {
     storageType = IntegerType::get(ctx, 32);
     if (isSigned) {
       qmin = std::numeric_limits<int32_t>::min();
       qmax = std::numeric_limits<int32_t>::max();
     } else {
       qmin = std::numeric_limits<uint32_t>::min();
       qmax = std::numeric_limits<uint32_t>::max();
     }
   } else {
     return true;
   }

   // Handle narrowRange.
   if (narrowRange) {
     qmin += 1;
   }
   return false;
 }

 // This is a specific implementation of nudging:
 // If 0.0 < rmin < rmax or rmin < rmax < 0.0, the range will be shifted
 // to include 0.0, but the range width size (rmax-rmin) isn't changed. The zero
 // point is derived from the shifted range, and the scale isn't changed. As
 // a consequence some values, which are supposed in the original [rmin, rmax]
 // range will be outside the shifted range and be clamped during quantization.
 // TODO: we should nudge the scale as well, but that requires the
 // fake quant op used in the training to use the nudged scale as well.
 static void getNudgedScaleAndZeroPoint(int64_t qmin, int64_t qmax, double rmin,
                                        double rmax, double &scale,
                                        int64_t &nudgedZeroPoint) {
   // Determine the scale.
   const double qminDouble = qmin;
   const double qmaxDouble = qmax;
   scale = (rmax - rmin) / (qmaxDouble - qminDouble);

   // Zero point computation.
   // In float, solve the affine equation for any known pair
   // (real value, corresponding quantized value), of which, two such pairs
   // are known: (rmin, qmin), (rmax, qmax).
   // The arithmetic error on the zero point computed from either pair will be
   // roughly machine_epsilon * (sum of absolute values of terms).
   // Use the variant that adds the smaller error.
   const double zeroPointFromMin = qminDouble - rmin / scale;
   const double zeroPointFromMinError =
       std::abs(qminDouble) + std::abs(rmin / scale);
   const double zeroPointFromMax = qmaxDouble - rmax / scale;
   const double zeroPointFromMaxError =
       std::abs(qmaxDouble) + std::abs(rmax / scale);

   const double zeroPointDouble = (zeroPointFromMinError < zeroPointFromMaxError)
                                      ? zeroPointFromMin
                                      : zeroPointFromMax;

   // Now nudge the zero point to be an integer.
   nudgedZeroPoint = 0;
   if (zeroPointDouble < qminDouble) {
     nudgedZeroPoint = qmin;
   } else if (zeroPointDouble > qmaxDouble) {
     nudgedZeroPoint = qmax;
   } else {
     nudgedZeroPoint = round(zeroPointDouble);
   }

   // By construction, the nudged zero point should always be in range.
   assert(nudgedZeroPoint >= qmin);
   assert(nudgedZeroPoint <= qmax);
 }

 UniformQuantizedType
 mlir::quant::fakeQuantAttrsToType(Location loc, unsigned numBits, double rmin,
                                   double rmax, bool narrowRange,
                                   Type expressedType, bool isSigned) {
   MLIRContext *ctx = expressedType.getContext();
   unsigned flags = isSigned ? QuantizationFlags::Signed : 0;
   Type storageType;
   int64_t qmin;
   int64_t qmax;
   if (getDefaultStorageParams(numBits, narrowRange, isSigned, ctx, storageType,
                               qmin, qmax)) {
     return (emitError(loc, "unsupported FakeQuant number of bits: ") << numBits,
             nullptr);
   }

   // Special case where min/max is close enough. The tensor contents are all
   // 0.0s, so the scale is set to 1.0 and the tensor can be quantized to zero
   // points and dequantized to 0.0.
   if (std::fabs(rmax - rmin) < std::numeric_limits<double>::epsilon()) {
     return UniformQuantizedType::getChecked(
         loc, flags, storageType, expressedType, 1.0, qmin, qmin, qmax);
   }

   double scale;
   int64_t nudgedZeroPoint;
   getNudgedScaleAndZeroPoint(qmin, qmax, rmin, rmax, scale, nudgedZeroPoint);

   return UniformQuantizedType::getChecked(loc, flags, storageType,
                                           expressedType, scale, nudgedZeroPoint,
                                           qmin, qmax);
 }

 UniformQuantizedPerAxisType mlir::quant::fakeQuantAttrsToType(
     Location loc, unsigned numBits, int32_t quantizedDimension,
     ArrayRef<double> rmins, ArrayRef<double> rmaxs, bool narrowRange,
     Type expressedType, bool isSigned) {
   size_t axis_size = rmins.size();
   if (axis_size != rmaxs.size()) {
     return (emitError(loc, "mismatched per-axis min and max size: ")
                 << axis_size << " vs. " << rmaxs.size(),
             nullptr);
   }

   MLIRContext *ctx = expressedType.getContext();
   Type storageType;
   int64_t qmin;
   int64_t qmax;
   if (getDefaultStorageParams(numBits, narrowRange, isSigned, ctx, storageType,
                               qmin, qmax)) {
     return (emitError(loc, "unsupported FakeQuant number of bits: ") << numBits,
             nullptr);
   }

   SmallVector<double, 4> scales;
   SmallVector<int64_t, 4> zeroPoints;
   scales.reserve(axis_size);
   zeroPoints.reserve(axis_size);
   for (size_t axis = 0; axis != axis_size; ++axis) {
     double rmin = rmins[axis];
     double rmax = rmaxs[axis];
     if (std::fabs(rmax - rmin) < std::numeric_limits<double>::epsilon()) {
       scales.push_back(1.0);
       zeroPoints.push_back(qmin);
       continue;
     }

     double scale;
     int64_t nudgedZeroPoint;
     getNudgedScaleAndZeroPoint(qmin, qmax, rmin, rmax, scale, nudgedZeroPoint);
     scales.push_back(scale);
     zeroPoints.push_back(nudgedZeroPoint);
   }

   unsigned flags = isSigned ? QuantizationFlags::Signed : 0;
   return UniformQuantizedPerAxisType::getChecked(
       loc, flags, storageType, expressedType, scales, zeroPoints,
       quantizedDimension, qmin, qmax);
 }
	//===- FakeQuantSupport.cpp - Support utilities for FakeQuant ops ---------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "mlir/Dialect/Quant/FakeQuantSupport.h"
	#include "mlir/Dialect/Quant/QuantTypes.h"

	using namespace mlir;
	using namespace mlir::quant;

	static bool getDefaultStorageParams(unsigned numBits, bool narrowRange,
	bool isSigned, MLIRContext *ctx,
	Type &storageType, int64_t &qmin,
	int64_t &qmax) {
	// Hard-coded type mapping from TFLite.
	if (numBits <= 8) {
	storageType = IntegerType::get(ctx, 8);
	if (isSigned) {
	qmin = -128;
	qmax = 127;
	} else {
	qmin = 0;
	qmax = 255;
	}
	} else if (numBits <= 16) {
	storageType = IntegerType::get(ctx, 16);
	if (isSigned) {
	qmin = -32768;
	qmax = 32767;
	} else {
	qmin = 0;
	qmax = 65535;
	}
	} else if (numBits <= 32) {
	storageType = IntegerType::get(ctx, 32);
	if (isSigned) {
	qmin = std::numeric_limits<int32_t>::min();
	qmax = std::numeric_limits<int32_t>::max();
	} else {
	qmin = std::numeric_limits<uint32_t>::min();
	qmax = std::numeric_limits<uint32_t>::max();
	}
	} else {
	return true;
	}

	// Handle narrowRange.
	if (narrowRange) {
	qmin += 1;
	}
	return false;
	}

	// This is a specific implementation of nudging:
	// If 0.0 < rmin < rmax or rmin < rmax < 0.0, the range will be shifted
	// to include 0.0, but the range width size (rmax-rmin) isn't changed. The zero
	// point is derived from the shifted range, and the scale isn't changed. As
	// a consequence some values, which are supposed in the original [rmin, rmax]
	// range will be outside the shifted range and be clamped during quantization.
	// TODO: we should nudge the scale as well, but that requires the
	// fake quant op used in the training to use the nudged scale as well.
	static void getNudgedScaleAndZeroPoint(int64_t qmin, int64_t qmax, double rmin,
	double rmax, double &scale,
	int64_t &nudgedZeroPoint) {
	// Determine the scale.
	const double qminDouble = qmin;
	const double qmaxDouble = qmax;
	scale = (rmax - rmin) / (qmaxDouble - qminDouble);

	// Zero point computation.
	// In float, solve the affine equation for any known pair
	// (real value, corresponding quantized value), of which, two such pairs
	// are known: (rmin, qmin), (rmax, qmax).
	// The arithmetic error on the zero point computed from either pair will be
	// roughly machine_epsilon * (sum of absolute values of terms).
	// Use the variant that adds the smaller error.
	const double zeroPointFromMin = qminDouble - rmin / scale;
	const double zeroPointFromMinError =
	std::abs(qminDouble) + std::abs(rmin / scale);
	const double zeroPointFromMax = qmaxDouble - rmax / scale;
	const double zeroPointFromMaxError =
	std::abs(qmaxDouble) + std::abs(rmax / scale);

	const double zeroPointDouble = (zeroPointFromMinError < zeroPointFromMaxError)
	? zeroPointFromMin
	: zeroPointFromMax;

	// Now nudge the zero point to be an integer.
	nudgedZeroPoint = 0;
	if (zeroPointDouble < qminDouble) {
	nudgedZeroPoint = qmin;
	} else if (zeroPointDouble > qmaxDouble) {
	nudgedZeroPoint = qmax;
	} else {
	nudgedZeroPoint = round(zeroPointDouble);
	}

	// By construction, the nudged zero point should always be in range.
	assert(nudgedZeroPoint >= qmin);
	assert(nudgedZeroPoint <= qmax);
	}

	UniformQuantizedType
	mlir::quant::fakeQuantAttrsToType(Location loc, unsigned numBits, double rmin,
	double rmax, bool narrowRange,
	Type expressedType, bool isSigned) {
	MLIRContext *ctx = expressedType.getContext();
	unsigned flags = isSigned ? QuantizationFlags::Signed : 0;
	Type storageType;
	int64_t qmin;
	int64_t qmax;
	if (getDefaultStorageParams(numBits, narrowRange, isSigned, ctx, storageType,
	qmin, qmax)) {
	return (emitError(loc, "unsupported FakeQuant number of bits: ") << numBits,
	nullptr);
	}

	// Special case where min/max is close enough. The tensor contents are all
	// 0.0s, so the scale is set to 1.0 and the tensor can be quantized to zero
	// points and dequantized to 0.0.
	if (std::fabs(rmax - rmin) < std::numeric_limits<double>::epsilon()) {
	return UniformQuantizedType::getChecked(
	loc, flags, storageType, expressedType, 1.0, qmin, qmin, qmax);
	}

	double scale;
	int64_t nudgedZeroPoint;
	getNudgedScaleAndZeroPoint(qmin, qmax, rmin, rmax, scale, nudgedZeroPoint);

	return UniformQuantizedType::getChecked(loc, flags, storageType,
	expressedType, scale, nudgedZeroPoint,
	qmin, qmax);
	}

	UniformQuantizedPerAxisType mlir::quant::fakeQuantAttrsToType(
	Location loc, unsigned numBits, int32_t quantizedDimension,
	ArrayRef<double> rmins, ArrayRef<double> rmaxs, bool narrowRange,
	Type expressedType, bool isSigned) {
	size_t axis_size = rmins.size();
	if (axis_size != rmaxs.size()) {
	return (emitError(loc, "mismatched per-axis min and max size: ")
	<< axis_size << " vs. " << rmaxs.size(),
	nullptr);
	}

	MLIRContext *ctx = expressedType.getContext();
	Type storageType;
	int64_t qmin;
	int64_t qmax;
	if (getDefaultStorageParams(numBits, narrowRange, isSigned, ctx, storageType,
	qmin, qmax)) {
	return (emitError(loc, "unsupported FakeQuant number of bits: ") << numBits,
	nullptr);
	}

	SmallVector<double, 4> scales;
	SmallVector<int64_t, 4> zeroPoints;
	scales.reserve(axis_size);
	zeroPoints.reserve(axis_size);
	for (size_t axis = 0; axis != axis_size; ++axis) {
	double rmin = rmins[axis];
	double rmax = rmaxs[axis];
	if (std::fabs(rmax - rmin) < std::numeric_limits<double>::epsilon()) {
	scales.push_back(1.0);
	zeroPoints.push_back(qmin);
	continue;
	}

	double scale;
	int64_t nudgedZeroPoint;
	getNudgedScaleAndZeroPoint(qmin, qmax, rmin, rmax, scale, nudgedZeroPoint);
	scales.push_back(scale);
	zeroPoints.push_back(nudgedZeroPoint);
	}

	unsigned flags = isSigned ? QuantizationFlags::Signed : 0;
	return UniformQuantizedPerAxisType::getChecked(
	loc, flags, storageType, expressedType, scales, zeroPoints,
	quantizedDimension, qmin, qmax);
	}