blob: 7961638a4661bf53c690e8010bd2415c00772e78 [file] [log] [blame]
//===- TosaToLinalg.cpp - Lowering Tosa to Linalg Dialect -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// These rewriters lower from the Tosa to the Linalg dialect.
//
//===----------------------------------------------------------------------===//
#include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h"
#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/Math/IR/Math.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
#include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include <numeric>
using namespace mlir;
static SmallVector<StringRef> getNParallelLoopsAttrs(unsigned nParallelLoops) {
return SmallVector<StringRef>(nParallelLoops, getParallelIteratorTypeName());
}
template <typename T>
static arith::ConstantOp
createConstFromIntAttribute(Operation *op, std::string attrName,
Type requiredAttrType, OpBuilder &rewriter) {
auto castedN = static_cast<T>(
op->getAttr(attrName).cast<IntegerAttr>().getValue().getSExtValue());
return rewriter.create<arith::ConstantOp>(
op->getLoc(), IntegerAttr::get(requiredAttrType, castedN));
}
template <typename T>
static void getValuesFromIntArrayAttribute(ArrayAttr attr,
SmallVector<T> &arrayValues) {
for (Attribute val : attr.getValue()) {
arrayValues.push_back(val.cast<IntegerAttr>().getValue().getSExtValue());
}
}
template <typename T, typename P>
static mlir::SelectOp clampHelper(Location loc, Value arg,
arith::ConstantOp min, arith::ConstantOp max,
P pred, OpBuilder &rewriter) {
auto smallerThanMin = rewriter.create<T>(loc, pred, arg, min);
auto minOrArg =
rewriter.create<mlir::SelectOp>(loc, smallerThanMin, min, arg);
auto largerThanMax = rewriter.create<T>(loc, pred, max, arg);
return rewriter.create<mlir::SelectOp>(loc, largerThanMax, max, minOrArg);
}
static mlir::Value applyPad(Location loc, Value input, ArrayRef<int64_t> pad,
Attribute padAttr, OpBuilder &rewriter) {
// Input should be padded if necessary.
if (llvm::all_of(pad, [](int64_t p) { return p == 0; }))
return input;
ShapedType inputTy = input.getType().cast<ShapedType>();
Type inputETy = inputTy.getElementType();
auto inputShape = inputTy.getShape();
assert((inputShape.size() * 2) == pad.size());
SmallVector<int64_t, 4> paddedShape;
SmallVector<OpFoldResult, 8> lowIndices;
SmallVector<OpFoldResult, 8> highIndices;
for (int i = 0, s = inputShape.size(); i < s; i++) {
auto lowPad = pad[i * 2];
auto highPad = pad[i * 2 + 1];
paddedShape.push_back(inputShape[i] + highPad + lowPad);
lowIndices.push_back(rewriter.getIndexAttr(lowPad));
highIndices.push_back(rewriter.getIndexAttr(highPad));
}
Value padValue = rewriter.create<arith::ConstantOp>(loc, padAttr);
return linalg::PadTensorOp::createPadScalarOp(
RankedTensorType::get(paddedShape, inputETy), input, padValue,
lowIndices, highIndices, /*nofold=*/false, loc, rewriter)
.result();
}
static SmallVector<Value> filterDynamicDims(SmallVector<Value> dynDims) {
SmallVector<Value> filteredDims;
for (auto dim : dynDims)
if (dim)
filteredDims.push_back(dim);
return filteredDims;
}
static Value
createLinalgBodyCalculationForElementwiseOp(Operation *op, ValueRange args,
ArrayRef<Type> resultTypes,
PatternRewriter &rewriter) {
Location loc = op->getLoc();
auto elementTy =
op->getOperand(0).getType().cast<ShapedType>().getElementType();
// tosa::AbsOp
if (isa<tosa::AbsOp>(op) && elementTy.isa<FloatType>())
return rewriter.create<math::AbsOp>(loc, resultTypes, args);
if (isa<tosa::AbsOp>(op) && elementTy.isa<IntegerType>()) {
auto zero = rewriter.create<arith::ConstantOp>(
loc, rewriter.getZeroAttr(elementTy));
auto cmp = rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::sgt,
args[0], zero);
auto neg = rewriter.create<arith::SubIOp>(loc, zero, args[0]);
return rewriter.create<mlir::SelectOp>(loc, cmp, args[0], neg);
}
// tosa::AddOp
if (isa<tosa::AddOp>(op) && elementTy.isa<FloatType>())
return rewriter.create<arith::AddFOp>(loc, resultTypes, args);
if (isa<tosa::AddOp>(op) && elementTy.isa<IntegerType>())
return rewriter.create<arith::AddIOp>(loc, resultTypes, args);
// tosa::SubOp
if (isa<tosa::SubOp>(op) && elementTy.isa<FloatType>())
return rewriter.create<arith::SubFOp>(loc, resultTypes, args);
if (isa<tosa::SubOp>(op) && elementTy.isa<IntegerType>())
return rewriter.create<arith::SubIOp>(loc, resultTypes, args);
// tosa::MulOp
if (isa<tosa::MulOp>(op) && elementTy.isa<FloatType>()) {
if (dyn_cast<tosa::MulOp>(op).shift() != 0) {
(void)rewriter.notifyMatchFailure(op,
"Cannot have shift value for float");
return nullptr;
}
return rewriter.create<arith::MulFOp>(loc, resultTypes, args);
}
// tosa::DivOp
if (isa<tosa::DivOp>(op) && elementTy.isa<IntegerType>())
return rewriter.create<arith::DivSIOp>(loc, resultTypes, args);
// tosa::ReciprocalOp
if (isa<tosa::ReciprocalOp>(op) && elementTy.isa<FloatType>()) {
auto one =
rewriter.create<arith::ConstantOp>(loc, FloatAttr::get(elementTy, 1));
return rewriter.create<arith::DivFOp>(loc, resultTypes, one, args[0]);
}
if (isa<tosa::MulOp>(op) && elementTy.isa<IntegerType>()) {
Value a = args[0];
Value b = args[1];
auto shift =
op->getAttr("shift").cast<IntegerAttr>().getValue().getSExtValue();
if (shift > 0) {
auto shiftConst =
rewriter.create<arith::ConstantIntOp>(loc, shift, /*bitwidth=*/8);
if (!a.getType().isInteger(32))
a = rewriter.create<arith::ExtSIOp>(loc, rewriter.getI32Type(), a);
if (!b.getType().isInteger(32))
b = rewriter.create<arith::ExtSIOp>(loc, rewriter.getI32Type(), b);
auto result = rewriter.create<tosa::ApplyScaleOp>(
loc, rewriter.getI32Type(), a, b, shiftConst,
rewriter.getBoolAttr(false));
if (elementTy.isInteger(32))
return result;
return rewriter.create<arith::TruncIOp>(loc, elementTy, result);
}
int aWidth = a.getType().getIntOrFloatBitWidth();
int bWidth = b.getType().getIntOrFloatBitWidth();
int cWidth = resultTypes[0].getIntOrFloatBitWidth();
if (aWidth < cWidth)
a = rewriter.create<arith::ExtSIOp>(loc, resultTypes[0], a);
if (bWidth < cWidth)
b = rewriter.create<arith::ExtSIOp>(loc, resultTypes[0], b);
return rewriter.create<arith::MulIOp>(loc, resultTypes, a, b);
}
// tosa::NegateOp
if (isa<tosa::NegateOp>(op) && elementTy.isa<FloatType>())
return rewriter.create<arith::NegFOp>(loc, resultTypes, args);
if (isa<tosa::NegateOp>(op) && elementTy.isa<IntegerType>() &&
!cast<tosa::NegateOp>(op).quantization_info()) {
auto constant =
rewriter.create<arith::ConstantOp>(loc, IntegerAttr::get(elementTy, 0));
return rewriter.create<arith::SubIOp>(loc, resultTypes, constant, args[0]);
}
if (isa<tosa::NegateOp>(op) && elementTy.isa<IntegerType>() &&
cast<tosa::NegateOp>(op).quantization_info()) {
auto quantizationInfo = cast<tosa::NegateOp>(op).quantization_info();
int32_t inputBitWidth = elementTy.getIntOrFloatBitWidth();
int64_t inZp =
quantizationInfo.getValue().input_zp().getValue().getSExtValue();
int64_t outZp =
quantizationInfo.getValue().output_zp().getValue().getSExtValue();
// Compute the maximum value that can occur in the intermediate buffer.
int64_t zpAdd = inZp + outZp;
int64_t maxValue = APInt::getSignedMaxValue(inputBitWidth).getSExtValue() +
std::abs(zpAdd) + 1;
// Convert that maximum value into the maximum bitwidth needed to represent
// it. We assume 48-bit numbers may be supported further in the pipeline.
int intermediateBitWidth = 64;
if (maxValue <= APInt::getSignedMaxValue(16).getSExtValue()) {
intermediateBitWidth = 16;
} else if (maxValue <= APInt::getSignedMaxValue(32).getSExtValue()) {
intermediateBitWidth = 32;
} else if (maxValue <= APInt::getSignedMaxValue(48).getSExtValue()) {
intermediateBitWidth = 48;
}
Type intermediateType = rewriter.getIntegerType(intermediateBitWidth);
Value zpAddValue = rewriter.create<arith::ConstantOp>(
loc, rewriter.getIntegerAttr(intermediateType, zpAdd));
// The negation can be applied by doing:
// outputValue = inZp + outZp - inputValue
auto ext = rewriter.create<arith::ExtSIOp>(loc, intermediateType, args[0]);
auto sub = rewriter.create<arith::SubIOp>(loc, zpAddValue, ext);
// Clamp to the negation range.
auto min = rewriter.create<arith::ConstantIntOp>(
loc, APInt::getSignedMinValue(inputBitWidth).getSExtValue(),
intermediateType);
auto max = rewriter.create<arith::ConstantIntOp>(
loc, APInt::getSignedMaxValue(inputBitWidth).getSExtValue(),
intermediateType);
auto clamp = clampHelper<arith::CmpIOp>(
loc, sub, min, max, arith::CmpIPredicate::slt, rewriter);
// Truncate to the final value.
return rewriter.create<arith::TruncIOp>(loc, elementTy, clamp);
}
// tosa::BitwiseAndOp
if (isa<tosa::BitwiseAndOp>(op) && elementTy.isa<IntegerType>())
return rewriter.create<arith::AndIOp>(loc, resultTypes, args);
// tosa::BitwiseOrOp
if (isa<tosa::BitwiseOrOp>(op) && elementTy.isa<IntegerType>())
return rewriter.create<arith::OrIOp>(loc, resultTypes, args);
// tosa::BitwiseNotOp
if (isa<tosa::BitwiseNotOp>(op) && elementTy.isa<IntegerType>()) {
auto allOnesAttr = rewriter.getIntegerAttr(
elementTy, APInt::getAllOnes(elementTy.getIntOrFloatBitWidth()));
auto allOnes = rewriter.create<arith::ConstantOp>(loc, allOnesAttr);
return rewriter.create<arith::XOrIOp>(loc, resultTypes, args[0], allOnes);
}
// tosa::BitwiseXOrOp
if (isa<tosa::BitwiseXorOp>(op) && elementTy.isa<IntegerType>())
return rewriter.create<arith::XOrIOp>(loc, resultTypes, args);
// tosa::LogicalLeftShiftOp
if (isa<tosa::LogicalLeftShiftOp>(op) && elementTy.isa<IntegerType>())
return rewriter.create<arith::ShLIOp>(loc, resultTypes, args);
// tosa::LogicalRightShiftOp
if (isa<tosa::LogicalRightShiftOp>(op) && elementTy.isa<IntegerType>())
return rewriter.create<arith::ShRUIOp>(loc, resultTypes, args);
// tosa::ArithmeticRightShiftOp
if (isa<tosa::ArithmeticRightShiftOp>(op) && elementTy.isa<IntegerType>()) {
auto result = rewriter.create<arith::ShRSIOp>(loc, resultTypes, args);
auto round = op->getAttr("round").cast<BoolAttr>().getValue();
if (!round) {
return result;
}
Type i1Ty = IntegerType::get(rewriter.getContext(), /*width=*/1);
auto one =
rewriter.create<arith::ConstantOp>(loc, IntegerAttr::get(elementTy, 1));
auto zero =
rewriter.create<arith::ConstantOp>(loc, IntegerAttr::get(elementTy, 0));
auto i1one =
rewriter.create<arith::ConstantOp>(loc, IntegerAttr::get(i1Ty, 1));
// Checking that input2 != 0
auto shiftValueGreaterThanZero = rewriter.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::sgt, args[1], zero);
// Checking for the last bit of input1 to be 1
auto subtract =
rewriter.create<arith::SubIOp>(loc, resultTypes, args[1], one);
auto shifted =
rewriter.create<arith::ShRSIOp>(loc, resultTypes, args[0], subtract)
->getResults();
auto truncated =
rewriter.create<arith::TruncIOp>(loc, i1Ty, shifted, mlir::None);
auto isInputOdd =
rewriter.create<arith::AndIOp>(loc, i1Ty, truncated, i1one);
auto shouldRound = rewriter.create<arith::AndIOp>(
loc, i1Ty, shiftValueGreaterThanZero, isInputOdd);
auto extended =
rewriter.create<arith::ExtUIOp>(loc, resultTypes, shouldRound);
return rewriter.create<arith::AddIOp>(loc, resultTypes, result, extended);
}
// tosa::ClzOp
if (isa<tosa::ClzOp>(op) && elementTy.isa<IntegerType>()) {
int bitWidth = elementTy.getIntOrFloatBitWidth();
auto zero =
rewriter.create<arith::ConstantOp>(loc, IntegerAttr::get(elementTy, 0));
auto leadingZeros = rewriter.create<arith::ConstantOp>(
loc, IntegerAttr::get(elementTy, bitWidth));
SmallVector<Value> operands = {args[0], leadingZeros, zero};
SmallVector<Type> types = {elementTy, elementTy, elementTy};
auto whileOp = rewriter.create<scf::WhileOp>(loc, types, operands);
Block *before = rewriter.createBlock(&whileOp.before(), {}, types);
Block *after = rewriter.createBlock(&whileOp.after(), {}, types);
// The conditional block of the while loop.
{
rewriter.setInsertionPointToStart(&whileOp.before().front());
Value input = before->getArgument(0);
Value zero = before->getArgument(2);
Value inputLargerThanZero = rewriter.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::ne, input, zero);
rewriter.create<scf::ConditionOp>(loc, inputLargerThanZero,
before->getArguments());
}
// The body of the while loop: shift right until reaching a value of 0.
{
rewriter.setInsertionPointToStart(&whileOp.after().front());
Value input = after->getArgument(0);
Value leadingZeros = after->getArgument(1);
auto one = rewriter.create<arith::ConstantOp>(
loc, IntegerAttr::get(elementTy, 1));
auto shifted =
rewriter.create<arith::ShRUIOp>(loc, resultTypes, input, one);
auto leadingZerosMinusOne =
rewriter.create<arith::SubIOp>(loc, resultTypes, leadingZeros, one);
rewriter.create<scf::YieldOp>(
loc,
ValueRange({shifted, leadingZerosMinusOne, after->getArgument(2)}));
}
rewriter.setInsertionPointAfter(whileOp);
return whileOp->getResult(1);
}
// tosa::LogicalAnd
if (isa<tosa::LogicalAndOp>(op) && elementTy.isInteger(1))
return rewriter.create<arith::AndIOp>(loc, resultTypes, args);
// tosa::LogicalNot
if (isa<tosa::LogicalNotOp>(op) && elementTy.isInteger(1)) {
auto one = rewriter.create<arith::ConstantOp>(
loc, rewriter.getIntegerAttr(elementTy, 1));
return rewriter.create<arith::XOrIOp>(loc, resultTypes, args[0], one);
}
// tosa::LogicalOr
if (isa<tosa::LogicalOrOp>(op) && elementTy.isInteger(1))
return rewriter.create<arith::OrIOp>(loc, resultTypes, args);
// tosa::LogicalXor
if (isa<tosa::LogicalXorOp>(op) && elementTy.isInteger(1))
return rewriter.create<arith::XOrIOp>(loc, resultTypes, args);
// tosa::PowOp
if (isa<tosa::PowOp>(op) && elementTy.isa<FloatType>())
return rewriter.create<mlir::math::PowFOp>(loc, resultTypes, args);
// tosa::RsqrtOp
if (isa<tosa::RsqrtOp>(op) && elementTy.isa<FloatType>())
return rewriter.create<mlir::math::RsqrtOp>(loc, resultTypes, args);
// tosa::LogOp
if (isa<tosa::LogOp>(op) && elementTy.isa<FloatType>())
return rewriter.create<mlir::math::LogOp>(loc, resultTypes, args);
// tosa::ExpOp
if (isa<tosa::ExpOp>(op) && elementTy.isa<FloatType>())
return rewriter.create<mlir::math::ExpOp>(loc, resultTypes, args);
// tosa::TanhOp
if (isa<tosa::TanhOp>(op) && elementTy.isa<FloatType>())
return rewriter.create<mlir::math::TanhOp>(loc, resultTypes, args);
// tosa::GreaterOp
if (isa<tosa::GreaterOp>(op) && elementTy.isa<FloatType>())
return rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGT,
args[0], args[1]);
if (isa<tosa::GreaterOp>(op) && elementTy.isSignlessInteger())
return rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::sgt,
args[0], args[1]);
// tosa::GreaterEqualOp
if (isa<tosa::GreaterEqualOp>(op) && elementTy.isa<FloatType>())
return rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE,
args[0], args[1]);
if (isa<tosa::GreaterEqualOp>(op) && elementTy.isSignlessInteger())
return rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::sge,
args[0], args[1]);
// tosa::EqualOp
if (isa<tosa::EqualOp>(op) && elementTy.isa<FloatType>())
return rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ,
args[0], args[1]);
if (isa<tosa::EqualOp>(op) && elementTy.isSignlessInteger())
return rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
args[0], args[1]);
// tosa::SelectOp
if (isa<tosa::SelectOp>(op)) {
elementTy = op->getOperand(1).getType().cast<ShapedType>().getElementType();
if (elementTy.isa<FloatType>() || elementTy.isa<IntegerType>())
return rewriter.create<mlir::SelectOp>(loc, args[0], args[1], args[2]);
}
// tosa::MaximumOp
if (isa<tosa::MaximumOp>(op) && elementTy.isa<FloatType>()) {
auto predicate = rewriter.create<arith::CmpFOp>(
loc, arith::CmpFPredicate::OGT, args[0], args[1]);
return rewriter.create<mlir::SelectOp>(loc, predicate, args[0], args[1]);
}
if (isa<tosa::MaximumOp>(op) && elementTy.isSignlessInteger()) {
auto predicate = rewriter.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::sgt, args[0], args[1]);
return rewriter.create<mlir::SelectOp>(loc, predicate, args[0], args[1]);
}
// tosa::MinimumOp
if (isa<tosa::MinimumOp>(op) && elementTy.isa<FloatType>()) {
auto predicate = rewriter.create<arith::CmpFOp>(
loc, arith::CmpFPredicate::OLT, args[0], args[1]);
return rewriter.create<mlir::SelectOp>(loc, predicate, args[0], args[1]);
}
if (isa<tosa::MinimumOp>(op) && elementTy.isSignlessInteger()) {
auto predicate = rewriter.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::slt, args[0], args[1]);
return rewriter.create<mlir::SelectOp>(loc, predicate, args[0], args[1]);
}
// tosa::CeilOp
if (isa<tosa::CeilOp>(op) && elementTy.isa<FloatType>())
return rewriter.create<math::CeilOp>(loc, resultTypes, args);
// tosa::FloorOp
if (isa<tosa::FloorOp>(op) && elementTy.isa<FloatType>())
return rewriter.create<math::FloorOp>(loc, resultTypes, args);
// tosa::ClampOp
if (isa<tosa::ClampOp>(op) && elementTy.isa<FloatType>()) {
auto min = rewriter.create<arith::ConstantOp>(loc, elementTy,
op->getAttr("min_fp"));
auto max = rewriter.create<arith::ConstantOp>(loc, elementTy,
op->getAttr("max_fp"));
return clampHelper<arith::CmpFOp>(loc, args[0], min, max,
arith::CmpFPredicate::OLT, rewriter);
}
if (isa<tosa::ClampOp>(op) && elementTy.isa<IntegerType>()) {
auto intTy = elementTy.cast<IntegerType>();
int32_t min = static_cast<int32_t>(
op->getAttr("min_int").cast<IntegerAttr>().getValue().getSExtValue());
int32_t max = static_cast<int32_t>(
op->getAttr("max_int").cast<IntegerAttr>().getValue().getSExtValue());
if (intTy.isUnsignedInteger()) {
min = std::max<int32_t>(min, 0);
max = std::min<int32_t>(
max,
APInt::getMaxValue(intTy.getIntOrFloatBitWidth()).getSExtValue());
} else {
min = std::max<int32_t>(
min, APInt::getSignedMinValue(intTy.getIntOrFloatBitWidth())
.getSExtValue());
max = std::min<int32_t>(
max, APInt::getSignedMaxValue(intTy.getIntOrFloatBitWidth())
.getSExtValue());
}
auto minVal = rewriter.create<arith::ConstantIntOp>(
loc, min, intTy.getIntOrFloatBitWidth());
auto maxVal = rewriter.create<arith::ConstantIntOp>(
loc, max, intTy.getIntOrFloatBitWidth());
return clampHelper<arith::CmpIOp>(loc, args[0], minVal, maxVal,
arith::CmpIPredicate::slt, rewriter);
}
// tosa::ReluNOp
if (isa<tosa::ReluNOp>(op) && elementTy.isa<FloatType>()) {
auto zero =
rewriter.create<arith::ConstantOp>(loc, FloatAttr::get(elementTy, 0));
auto n = rewriter.create<arith::ConstantOp>(loc, elementTy,
op->getAttr("max_fp"));
return clampHelper<arith::CmpFOp>(loc, args[0], zero, n,
arith::CmpFPredicate::OLT, rewriter);
}
if (isa<tosa::ReluNOp>(op) && elementTy.isa<IntegerType>()) {
auto zero =
rewriter.create<arith::ConstantOp>(loc, IntegerAttr::get(elementTy, 0));
auto n = createConstFromIntAttribute<int32_t>(op, "max_int", elementTy,
rewriter);
return clampHelper<arith::CmpIOp>(loc, args[0], zero, n,
arith::CmpIPredicate::slt, rewriter);
}
// tosa::SigmoidOp
if (isa<tosa::SigmoidOp>(op) && elementTy.isa<FloatType>()) {
auto one =
rewriter.create<arith::ConstantOp>(loc, FloatAttr::get(elementTy, 1));
auto negate = rewriter.create<arith::NegFOp>(loc, resultTypes, args[0]);
auto exp = rewriter.create<mlir::math::ExpOp>(loc, resultTypes, negate);
auto added = rewriter.create<arith::AddFOp>(loc, resultTypes, exp, one);
return rewriter.create<arith::DivFOp>(loc, resultTypes, one, added);
}
// tosa::CastOp
if (isa<tosa::CastOp>(op)) {
Type srcTy = elementTy;
Type dstTy = resultTypes.front();
bool bitExtend =
srcTy.getIntOrFloatBitWidth() < dstTy.getIntOrFloatBitWidth();
if (srcTy == dstTy)
return args.front();
if (srcTy.isa<FloatType>() && dstTy.isa<FloatType>() && bitExtend)
return rewriter.create<arith::ExtFOp>(loc, resultTypes, args, mlir::None);
if (srcTy.isa<FloatType>() && dstTy.isa<FloatType>() && !bitExtend)
return rewriter.create<arith::TruncFOp>(loc, resultTypes, args,
mlir::None);
// 1-bit integers need to be treated as signless.
if (srcTy.isInteger(1) && arith::UIToFPOp::areCastCompatible(srcTy, dstTy))
return rewriter.create<arith::UIToFPOp>(loc, resultTypes, args,
mlir::None);
if (srcTy.isInteger(1) && dstTy.isa<IntegerType>() && bitExtend)
return rewriter.create<arith::ExtUIOp>(loc, resultTypes, args,
mlir::None);
// Unsigned integers need an unrealized cast so that they can be passed
// to UIToFP.
if (srcTy.isUnsignedInteger() && dstTy.isa<FloatType>()) {
auto unrealizedCast =
rewriter
.create<UnrealizedConversionCastOp>(
loc, rewriter.getIntegerType(srcTy.getIntOrFloatBitWidth()),
args[0])
.getResult(0);
return rewriter.create<arith::UIToFPOp>(loc, resultTypes[0],
unrealizedCast);
}
// All other si-to-fp conversions should be handled by SIToFP.
if (arith::SIToFPOp::areCastCompatible(srcTy, dstTy))
return rewriter.create<arith::SIToFPOp>(loc, resultTypes, args,
mlir::None);
// Casting to boolean, floats need to only be checked as not-equal to zero.
if (srcTy.isa<FloatType>() && dstTy.isInteger(1)) {
Value zero = rewriter.create<arith::ConstantOp>(
loc, rewriter.getFloatAttr(srcTy, 0.0));
return rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::UNE,
args.front(), zero);
}
if (arith::FPToSIOp::areCastCompatible(srcTy, dstTy)) {
auto zero = rewriter.create<arith::ConstantOp>(
loc, rewriter.getF32FloatAttr(0.0f));
auto half = rewriter.create<arith::ConstantOp>(
loc, rewriter.getF32FloatAttr(0.5f));
auto intMin = rewriter.create<arith::ConstantOp>(
loc, rewriter.getF32FloatAttr(
APInt::getSignedMinValue(dstTy.getIntOrFloatBitWidth())
.getSExtValue()));
auto intMax = rewriter.create<arith::ConstantOp>(
loc, rewriter.getF32FloatAttr(
APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth())
.getSExtValue()));
auto added = rewriter.create<arith::AddFOp>(loc, args[0], half);
auto subbed = rewriter.create<arith::SubFOp>(loc, args[0], half);
auto negative = rewriter.create<arith::CmpFOp>(
loc, arith::CmpFPredicate::OLT, args[0], zero);
auto rounded =
rewriter.create<mlir::SelectOp>(loc, negative, subbed, added);
auto clamped = clampHelper<arith::CmpFOp>(
loc, rounded, intMin, intMax, arith::CmpFPredicate::OLT, rewriter);
return rewriter.create<arith::FPToSIOp>(loc, dstTy, clamped);
}
// Casting to boolean, integers need to only be checked as not-equal to
// zero.
if (srcTy.isa<IntegerType>() && dstTy.isInteger(1)) {
Value zero = rewriter.create<arith::ConstantIntOp>(
loc, 0, srcTy.getIntOrFloatBitWidth());
return rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::ne,
args.front(), zero);
}
if (srcTy.isa<IntegerType>() && dstTy.isa<IntegerType>() && bitExtend)
return rewriter.create<arith::ExtSIOp>(loc, resultTypes, args,
mlir::None);
if (srcTy.isa<IntegerType>() && dstTy.isa<IntegerType>() && !bitExtend) {
auto intMin = rewriter.create<arith::ConstantIntOp>(
loc,
APInt::getSignedMinValue(dstTy.getIntOrFloatBitWidth())
.getSExtValue(),
srcTy.getIntOrFloatBitWidth());
auto intMax = rewriter.create<arith::ConstantIntOp>(
loc,
APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth())
.getSExtValue(),
srcTy.getIntOrFloatBitWidth());
auto clamped = clampHelper<arith::CmpIOp>(
loc, args[0], intMin, intMax, arith::CmpIPredicate::slt, rewriter);
return rewriter.create<arith::TruncIOp>(loc, dstTy, clamped);
}
}
(void)rewriter.notifyMatchFailure(
op, "unhandled op for linalg body calculation for elementwise op");
return nullptr;
}
static LogicalResult
elementwiseMatchAndRewriteHelper(Operation *operation,
PatternRewriter &rewriter) {
auto loc = operation->getLoc();
assert(operation->getNumResults() == 1 &&
"All TOSA elementwise ops should only return a single result.");
auto results = operation->getResults();
auto resultTy = operation->getResult(0).getType().dyn_cast<ShapedType>();
if (!resultTy)
return rewriter.notifyMatchFailure(operation,
"All results must be a shaped type");
unsigned rank = resultTy.getRank();
// Construct the indexing maps needed for linalg.generic ops.
SmallVector<Type> bodyArgTypes;
for (Value in : operation->getOperands())
bodyArgTypes.emplace_back(getElementTypeOrSelf(in.getType()));
SmallVector<Type> opResultTypes;
SmallVector<Value> initTensors;
SmallVector<Value> dynDims;
dynDims.resize(results.front().getType().cast<ShapedType>().getRank());
for (auto arg : operation->getOperands()) {
auto operandTy = arg.getType().cast<ShapedType>();
for (int i = 0; i < operandTy.getRank(); i++) {
if (operandTy.isDynamicDim(i) && !dynDims[i])
dynDims[i] = rewriter.create<tensor::DimOp>(loc, arg, i);
}
}
SmallVector<Value> filteredDims = filterDynamicDims(dynDims);
for (auto result : results) {
auto resultTy = result.getType().template cast<ShapedType>();
initTensors.push_back(rewriter.create<linalg::InitTensorOp>(
loc, filteredDims, resultTy.getShape(), resultTy.getElementType()));
opResultTypes.push_back(result.getType());
}
auto bodyResultTypes = llvm::to_vector<4>(llvm::map_range(
initTensors, [](Value v) { return getElementTypeOrSelf(v); }));
SmallVector<Value, 2> operands;
SmallVector<AffineMap, 2> indexingMaps;
indexingMaps.reserve(operation->getNumOperands() + bodyResultTypes.size());
// Input indexing maps may be broadcasted.
for (Value operand : operation->getOperands()) {
ShapedType type = operand.getType().cast<ShapedType>();
if (type.getShape() == resultTy.getShape()) {
operands.push_back(operand);
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(rank));
continue;
}
SmallVector<int64_t, 5> newShape;
SmallVector<AffineExpr, 4> affineExprs;
newShape.reserve(type.getRank());
for (auto it : llvm::enumerate(type.getShape())) {
if (it.value() == resultTy.getDimSize(it.index())) {
newShape.push_back(it.value());
affineExprs.push_back(
mlir::getAffineDimExpr(it.index(), rewriter.getContext()));
}
}
if (newShape.size() != rank) {
operand = rewriter.create<tosa::ReshapeOp>(
loc, RankedTensorType::get(newShape, type.getElementType()), operand,
rewriter.getI64ArrayAttr(newShape));
}
operands.push_back(operand);
indexingMaps.push_back(AffineMap::get(
/*dimCount=*/type.getRank(), /*symbolCount=*/0, affineExprs,
rewriter.getContext()));
}
indexingMaps.append(operation->getNumResults(),
rewriter.getMultiDimIdentityMap(rank));
bool didEncounterError = false;
auto linalgOp = rewriter.create<linalg::GenericOp>(
loc, opResultTypes, operands, initTensors, indexingMaps,
getNParallelLoopsAttrs(rank),
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange blockArgs) {
Value opResult = createLinalgBodyCalculationForElementwiseOp(
operation, blockArgs.take_front(operation->getNumOperands()),
bodyResultTypes, rewriter);
if (!opResult) {
didEncounterError = true;
return;
}
nestedBuilder.create<linalg::YieldOp>(loc, opResult);
});
if (didEncounterError)
return failure();
rewriter.replaceOp(operation, linalgOp->getResults());
return success();
}
// Returns the constant initial value for a given reduction operation. The
// attribute type varies depending on the element type required.
static Attribute createInitialValueForReduceOp(Operation *op, Type elementTy,
PatternRewriter &rewriter) {
if (isa<tosa::ReduceSumOp>(op) && elementTy.isa<FloatType>())
return rewriter.getFloatAttr(elementTy, 0.0);
if (isa<tosa::ReduceSumOp>(op) && elementTy.isa<IntegerType>())
return rewriter.getIntegerAttr(elementTy, 0);
if (isa<tosa::ReduceProdOp>(op) && elementTy.isa<FloatType>())
return rewriter.getFloatAttr(elementTy, 1.0);
if (isa<tosa::ReduceProdOp>(op) && elementTy.isa<IntegerType>())
return rewriter.getIntegerAttr(elementTy, 1);
if (isa<tosa::ReduceMinOp>(op) && elementTy.isa<FloatType>())
return rewriter.getFloatAttr(
elementTy, APFloat::getLargest(
elementTy.cast<FloatType>().getFloatSemantics(), false));
if (isa<tosa::ReduceMinOp>(op) && elementTy.isa<IntegerType>())
return rewriter.getIntegerAttr(
elementTy, APInt::getSignedMaxValue(elementTy.getIntOrFloatBitWidth()));
if (isa<tosa::ReduceMaxOp>(op) && elementTy.isa<FloatType>())
return rewriter.getFloatAttr(
elementTy, APFloat::getLargest(
elementTy.cast<FloatType>().getFloatSemantics(), true));
if (isa<tosa::ReduceMaxOp>(op) && elementTy.isa<IntegerType>())
return rewriter.getIntegerAttr(
elementTy, APInt::getSignedMinValue(elementTy.getIntOrFloatBitWidth()));
if (isa<tosa::ReduceAllOp>(op) && elementTy.isInteger(1))
return rewriter.getIntegerAttr(elementTy, APInt::getAllOnes(1));
if (isa<tosa::ReduceAnyOp>(op) && elementTy.isInteger(1))
return rewriter.getIntegerAttr(elementTy, APInt::getZero(1));
if (isa<tosa::ArgMaxOp>(op) && elementTy.isa<FloatType>())
return rewriter.getFloatAttr(
elementTy, APFloat::getLargest(
elementTy.cast<FloatType>().getFloatSemantics(), true));
if (isa<tosa::ArgMaxOp>(op) && elementTy.isa<IntegerType>())
return rewriter.getIntegerAttr(
elementTy, APInt::getSignedMinValue(elementTy.getIntOrFloatBitWidth()));
return {};
}
// Creates the body calculation for a reduction. The operations vary depending
// on the input type.
static Value createLinalgBodyCalculationForReduceOp(Operation *op,
ValueRange args,
Type elementTy,
PatternRewriter &rewriter) {
Location loc = op->getLoc();
if (isa<tosa::ReduceSumOp>(op) && elementTy.isa<FloatType>()) {
return rewriter.create<arith::AddFOp>(loc, args);
}
if (isa<tosa::ReduceSumOp>(op) && elementTy.isa<IntegerType>()) {
return rewriter.create<arith::AddIOp>(loc, args);
}
if (isa<tosa::ReduceProdOp>(op) && elementTy.isa<FloatType>()) {
return rewriter.create<arith::MulFOp>(loc, args);
}
if (isa<tosa::ReduceProdOp>(op) && elementTy.isa<IntegerType>()) {
return rewriter.create<arith::MulIOp>(loc, args);
}
if (isa<tosa::ReduceMinOp>(op) && elementTy.isa<FloatType>()) {
auto predicate = rewriter.create<arith::CmpFOp>(
loc, arith::CmpFPredicate::OLT, args[0], args[1]);
return rewriter.create<mlir::SelectOp>(loc, predicate, args[0], args[1]);
}
if (isa<tosa::ReduceMinOp>(op) && elementTy.isa<IntegerType>()) {
auto predicate = rewriter.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::slt, args[0], args[1]);
return rewriter.create<mlir::SelectOp>(loc, predicate, args[0], args[1]);
}
if (isa<tosa::ReduceMaxOp>(op) && elementTy.isa<FloatType>()) {
auto predicate = rewriter.create<arith::CmpFOp>(
loc, arith::CmpFPredicate::OGT, args[0], args[1]);
return rewriter.create<mlir::SelectOp>(loc, predicate, args[0], args[1]);
}
if (isa<tosa::ReduceMaxOp>(op) && elementTy.isa<IntegerType>()) {
auto predicate = rewriter.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::sgt, args[0], args[1]);
return rewriter.create<mlir::SelectOp>(loc, predicate, args[0], args[1]);
}
if (isa<tosa::ReduceAllOp>(op) && elementTy.isInteger(1))
return rewriter.create<arith::AndIOp>(loc, args);
if (isa<tosa::ReduceAnyOp>(op) && elementTy.isInteger(1))
return rewriter.create<arith::OrIOp>(loc, args);
return {};
}
// Performs the match and rewrite for reduction operations. This includes
// declaring a correctly sized initial value, and the linalg.generic operation
// that reduces across the specified axis.
static LogicalResult reduceMatchAndRewriteHelper(Operation *op, uint64_t axis,
PatternRewriter &rewriter) {
auto loc = op->getLoc();
auto inputTy = op->getOperand(0).getType().template cast<ShapedType>();
auto resultTy = op->getResult(0).getType().template cast<ShapedType>();
auto elementTy = resultTy.getElementType();
Value input = op->getOperand(0);
llvm::SmallVector<int64_t> reduceShape;
for (unsigned i = 0; i < inputTy.getRank(); i++) {
if (axis != i)
reduceShape.push_back(inputTy.getDimSize(i));
}
Type reduceTy = RankedTensorType::get(reduceShape, resultTy.getElementType());
// First fill the output buffer with the init value.
auto initTensor =
rewriter
.create<linalg::InitTensorOp>(loc, ArrayRef<Value>({}), reduceShape,
resultTy.getElementType())
.result();
auto fillValueAttr = createInitialValueForReduceOp(op, elementTy, rewriter);
if (!fillValueAttr)
return rewriter.notifyMatchFailure(
op, "No initial value found for reduction operation");
auto fillValue = rewriter.create<arith::ConstantOp>(loc, fillValueAttr);
auto filledTensor =
rewriter.create<linalg::FillOp>(loc, fillValue, initTensor).result();
SmallVector<AffineExpr, 2> srcExprs;
SmallVector<AffineExpr, 2> dstExprs;
SmallVector<StringRef, 4> iteratorTypes;
for (unsigned int i = 0, rank = inputTy.getRank(); i != rank; ++i) {
srcExprs.push_back(mlir::getAffineDimExpr(i, rewriter.getContext()));
iteratorTypes.push_back(axis == i ? getReductionIteratorTypeName()
: getParallelIteratorTypeName());
if (axis != i)
dstExprs.push_back(mlir::getAffineDimExpr(i, rewriter.getContext()));
}
bool didEncounterError = false;
auto maps = AffineMap::inferFromExprList({srcExprs, dstExprs});
auto linalgOp = rewriter.create<linalg::GenericOp>(
loc, reduceTy, input, filledTensor, maps, iteratorTypes,
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange blockArgs) {
auto result = createLinalgBodyCalculationForReduceOp(
op, blockArgs, elementTy, rewriter);
if (result)
didEncounterError = true;
nestedBuilder.create<linalg::YieldOp>(loc, result);
});
if (!didEncounterError)
return failure();
rewriter.replaceOpWithNewOp<tosa::ReshapeOp>(op, resultTy,
linalgOp.getResults());
return success();
}
static bool findIntermediateShape(ArrayRef<int64_t> lhsShape,
ArrayRef<int64_t> rhsShape,
SmallVector<int64_t> &intermediateShape,
bool isDynamic) {
if (isDynamic) {
// TODO (natashaknk): Make dynamic intermediate shape not always be rank-1
intermediateShape = {-1};
return true;
}
if (lhsShape.empty() || rhsShape.empty()) {
intermediateShape = {};
return true;
}
unsigned currLhsDim = 0, currRhsDim = 0;
while (currLhsDim < lhsShape.size() && currRhsDim < rhsShape.size()) {
int64_t rhsSize = rhsShape[currRhsDim];
int64_t lhsSize = lhsShape[currLhsDim];
while (lhsSize != rhsSize && currLhsDim < lhsShape.size() &&
currRhsDim < rhsShape.size()) {
if (lhsSize < rhsSize) {
currLhsDim++;
lhsSize *= lhsShape[currLhsDim];
} else {
currRhsDim++;
rhsSize *= rhsShape[currRhsDim];
}
}
if (lhsSize == rhsSize) {
intermediateShape.push_back(lhsSize);
}
currRhsDim++;
currLhsDim++;
}
// If the iterators didn't reach the end and their leftover dimensions are not
// equal to 1 an intermediate shape was not found.
while (currLhsDim < lhsShape.size()) {
if (lhsShape[currLhsDim++] != 1) {
return false;
}
}
while (currRhsDim < rhsShape.size()) {
if (rhsShape[currRhsDim++] != 1) {
return false;
}
}
return true;
}
static bool createReassociationMapsForCollapse(
PatternRewriter &rewriter, ArrayRef<int64_t> srcShape,
ArrayRef<int64_t> dstShape,
SmallVector<ReassociationExprs, 4> &reassociationMap, bool isDynamic) {
// If the shape is dynamic, create a map for collapsing into one dimension.
if (isDynamic) {
SmallVector<AffineExpr, 2> exprs;
for (int i = 0, s = srcShape.size(); i < s; ++i)
exprs.push_back(rewriter.getAffineDimExpr(i));
reassociationMap = {exprs};
return true;
}
if (dstShape.empty()) {
reassociationMap = {};
return true;
}
reassociationMap.resize(dstShape.size());
unsigned currSrcDim = 0, currDstDim = 0;
while (currSrcDim < srcShape.size() && currDstDim < dstShape.size()) {
int64_t dstSize = dstShape[currDstDim];
int64_t srcSize = srcShape[currSrcDim];
while (srcSize < dstSize && currSrcDim < srcShape.size()) {
reassociationMap[currDstDim].push_back(
rewriter.getAffineDimExpr(currSrcDim++));
srcSize *= srcShape[currSrcDim];
}
if (srcSize == dstSize) {
reassociationMap[currDstDim].push_back(
rewriter.getAffineDimExpr(currSrcDim++));
// If the next dim in collapsedShape is not 1, treat subsequent dims in
// expandedShape which are 1 to be collapsed.
if (currDstDim == dstShape.size() - 1 || dstShape[currDstDim + 1] != 1) {
while (currSrcDim < srcShape.size() && srcShape[currSrcDim] == 1) {
reassociationMap[currDstDim].push_back(
rewriter.getAffineDimExpr(currSrcDim++));
}
}
}
currDstDim++;
}
// If both iterators didn't reach the end, we have leftover dimentions which
// implies that we have a mismatch in shape.
if (currSrcDim != srcShape.size() || currDstDim != dstShape.size()) {
return false;
}
return true;
}
namespace {
template <typename SrcOp>
class PointwiseConverter : public OpRewritePattern<SrcOp> {
public:
using OpRewritePattern<SrcOp>::OpRewritePattern;
LogicalResult matchAndRewrite(SrcOp op,
PatternRewriter &rewriter) const final {
return elementwiseMatchAndRewriteHelper(op, rewriter);
}
};
class ConvConverter : public OpConversionPattern<tosa::Conv2DOp> {
public:
using OpConversionPattern<tosa::Conv2DOp>::OpConversionPattern;
LogicalResult
matchAndRewrite(tosa::Conv2DOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const final {
Location loc = op->getLoc();
Value input = op->getOperand(0);
Value weight = op->getOperand(1);
Value bias = op->getOperand(2);
ShapedType inputTy = input.getType().cast<ShapedType>();
ShapedType weightTy = weight.getType().cast<ShapedType>();
ShapedType biasTy = bias.getType().cast<ShapedType>();
ShapedType resultTy = op->getResult(0).getType().cast<ShapedType>();
Type inputETy = inputTy.getElementType();
Type resultETy = resultTy.getElementType();
auto padAttr = op->getAttr("pad").cast<ArrayAttr>();
auto strideTosaAttr = op->getAttr("stride").cast<ArrayAttr>();
auto dilationTosaAttr = op->getAttr("dilation").cast<ArrayAttr>();
bool isQuantized = op->hasAttr("quantization_info");
if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() ||
!biasTy.hasStaticShape() || !resultTy.hasStaticShape())
return rewriter.notifyMatchFailure(op,
"tosa.conv ops require static shapes");
if (inputETy.isUnsignedInteger())
return rewriter.notifyMatchFailure(
op, "tosa.conv ops does not support unsigned integer input");
auto weightShape = weightTy.getShape();
// Apply padding as necessary.
Attribute zeroAttr = rewriter.getZeroAttr(inputETy);
if (isQuantized) {
auto quantizationInfo =
op->getAttr("quantization_info").cast<tosa::ConvOpQuantizationAttr>();
auto iZp = quantizationInfo.input_zp().getValue().getSExtValue();
int64_t intMin =
APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth())
.getSExtValue();
int64_t intMax =
APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth())
.getSExtValue();
if (iZp < intMin || iZp > intMax)
return rewriter.notifyMatchFailure(
op, "tosa.conv op quantization has zp outside of input range");
zeroAttr = rewriter.getIntegerAttr(inputETy, iZp);
}
llvm::SmallVector<int64_t> pad;
pad.resize(2, 0);
getValuesFromIntArrayAttribute(padAttr, pad);
pad.resize(pad.size() + 2, 0);
input = applyPad(loc, input, pad, zeroAttr, rewriter);
// Transpose the kernel to match dimension ordering of the linalg
// convolution operation.
// TODO(suderman): See if this can be efficiently folded - check whether
// the input is used anywhere else, if not fold the constant.
SmallVector<int64_t> weightPerm{1, 2, 3, 0};
SmallVector<int64_t> newWeightShape{weightShape[1], weightShape[2],
weightShape[3], weightShape[0]};
auto weightPermAttr = DenseIntElementsAttr::get(
RankedTensorType::get({4}, rewriter.getI64Type()), weightPerm);
Value weightPermValue =
rewriter.create<arith::ConstantOp>(loc, weightPermAttr);
Type newWeightTy =
RankedTensorType::get(newWeightShape, weightTy.getElementType());
weight = rewriter.create<tosa::TransposeOp>(loc, newWeightTy, weight,
weightPermValue);
Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy);
Value initTensor = rewriter.create<linalg::InitTensorOp>(
loc, resultTy.getShape(), resultETy);
Value zero = rewriter.create<arith::ConstantOp>(loc, resultZeroAttr);
Value zeroTensor =
rewriter.create<linalg::FillOp>(loc, zero, initTensor).getResult(0);
// Extract the attributes for convolution.
llvm::SmallVector<int64_t> stride, dilation;
getValuesFromIntArrayAttribute(strideTosaAttr, stride);
getValuesFromIntArrayAttribute(dilationTosaAttr, dilation);
// Create the convolution op.
auto strideAttr = DenseIntElementsAttr::get(
RankedTensorType::get({2}, rewriter.getI64Type()), stride);
auto dilationAttr = DenseIntElementsAttr::get(
RankedTensorType::get({2}, rewriter.getI64Type()), dilation);
// Create maps for the bias broadcasting
SmallVector<AffineMap, 4> indexingMaps;
indexingMaps.push_back(AffineMap::get(
/*dimCount=*/resultTy.getRank(), /*symbolCount=*/0,
{rewriter.getAffineDimExpr(3)}, rewriter.getContext()));
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank()));
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank()));
Value biasInitTensor = rewriter.create<linalg::InitTensorOp>(
loc, resultTy.getShape(), resultETy);
if (isQuantized) {
auto quantizationInfo =
op->getAttr("quantization_info").cast<tosa::ConvOpQuantizationAttr>();
auto iZp = rewriter.getI32IntegerAttr(
quantizationInfo.input_zp().getValue().getSExtValue());
auto kZp = rewriter.getI32IntegerAttr(
quantizationInfo.weight_zp().getValue().getSExtValue());
auto iZpVal = rewriter.create<arith::ConstantOp>(loc, iZp);
auto kZpVal = rewriter.create<arith::ConstantOp>(loc, kZp);
Value conv =
rewriter
.create<linalg::Conv2DNhwcHwcfQOp>(
loc, resultTy, ValueRange{input, weight, iZpVal, kZpVal},
ValueRange{zeroTensor}, strideAttr, dilationAttr)
->getResult(0);
Value result =
rewriter
.create<linalg::GenericOp>(
loc, resultTy, ValueRange({bias, conv}), biasInitTensor,
indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()),
[&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange args) {
Value added = nestedBuilder.create<arith::AddIOp>(
loc, args[0], args[1]);
nestedBuilder.create<linalg::YieldOp>(nestedLoc, added);
})
.getResult(0);
rewriter.replaceOp(op, result);
return success();
}
Value conv = rewriter
.create<linalg::Conv2DNhwcHwcfOp>(
loc, resultTy, ValueRange{input, weight},
ValueRange{zeroTensor}, strideAttr, dilationAttr)
->getResult(0);
Value result =
rewriter
.create<linalg::GenericOp>(
loc, resultTy, ValueRange({bias, conv}), biasInitTensor,
indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()),
[&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange args) {
Value added = nestedBuilder.create<arith::AddFOp>(
loc, args[0], args[1]);
nestedBuilder.create<linalg::YieldOp>(nestedLoc, added);
})
.getResult(0);
rewriter.replaceOp(op, result);
return success();
}
};
class DepthwiseConvConverter
: public OpConversionPattern<tosa::DepthwiseConv2DOp> {
public:
using OpConversionPattern<tosa::DepthwiseConv2DOp>::OpConversionPattern;
LogicalResult
matchAndRewrite(tosa::DepthwiseConv2DOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const final {
Location loc = op->getLoc();
Value input = op->getOperand(0);
Value weight = op->getOperand(1);
Value bias = op->getOperand(2);
ShapedType inputTy = input.getType().cast<ShapedType>();
ShapedType weightTy = weight.getType().cast<ShapedType>();
ShapedType biasTy = bias.getType().cast<ShapedType>();
ShapedType resultTy = op->getResult(0).getType().cast<ShapedType>();
Type inputETy = inputTy.getElementType();
Type resultETy = resultTy.getElementType();
auto padAttr = op->getAttr("pad").cast<ArrayAttr>();
auto strideTosaAttr = op->getAttr("stride").cast<ArrayAttr>();
auto dilationTosaAttr = op->getAttr("dilation").cast<ArrayAttr>();
bool isQuantized = op->hasAttr("quantization_info");
IntegerAttr iZp;
IntegerAttr kZp;
if (isQuantized) {
auto quantizationInfo =
op->getAttr("quantization_info").cast<tosa::ConvOpQuantizationAttr>();
iZp = rewriter.getI32IntegerAttr(
quantizationInfo.input_zp().getValue().getSExtValue());
kZp = rewriter.getI32IntegerAttr(
quantizationInfo.weight_zp().getValue().getSExtValue());
}
if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() ||
!biasTy.hasStaticShape() || !resultTy.hasStaticShape())
return rewriter.notifyMatchFailure(op,
"tosa.conv ops require static shapes");
auto weightShape = weightTy.getShape();
auto resultShape = resultTy.getShape();
// Apply padding as necessary.
Attribute zeroAttr = rewriter.getZeroAttr(inputETy);
if (isQuantized) {
auto quantizationInfo =
op->getAttr("quantization_info").cast<tosa::ConvOpQuantizationAttr>();
auto iZp = quantizationInfo.input_zp().getValue().getSExtValue();
int64_t intMin =
APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth())
.getSExtValue();
int64_t intMax =
APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth())
.getSExtValue();
if (iZp < intMin || iZp > intMax)
return rewriter.notifyMatchFailure(
op, "tosa.depthwise_conv op quantization has zp outside of input "
"range");
zeroAttr = rewriter.getIntegerAttr(inputETy, iZp);
}
llvm::SmallVector<int64_t> pad;
pad.resize(2, 0);
getValuesFromIntArrayAttribute(padAttr, pad);
pad.resize(pad.size() + 2, 0);
input = applyPad(loc, input, pad, zeroAttr, rewriter);
// Extract the attributes for convolution.
llvm::SmallVector<int64_t> stride, dilation;
getValuesFromIntArrayAttribute(strideTosaAttr, stride);
getValuesFromIntArrayAttribute(dilationTosaAttr, dilation);
// Create the convolution op.
auto strideAttr = DenseIntElementsAttr::get(
RankedTensorType::get({2}, rewriter.getI64Type()), stride);
auto dilationAttr = DenseIntElementsAttr::get(
RankedTensorType::get({2}, rewriter.getI64Type()), dilation);
ShapedType linalgConvTy =
RankedTensorType::get({resultShape[0], resultShape[1], resultShape[2],
weightShape[2], weightShape[3]},
resultETy);
// Broadcast the initial value to the output tensor before convolving.
SmallVector<AffineMap, 4> indexingMaps;
indexingMaps.push_back(AffineMap::get(
/*dimCount=*/resultTy.getRank(), /*symbolCount=*/0,
{rewriter.getAffineDimExpr(3)}, rewriter.getContext()));
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank()));
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank()));
Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy);
Value initTensor = rewriter.create<linalg::InitTensorOp>(
loc, linalgConvTy.getShape(), resultETy);
Value zero = rewriter.create<arith::ConstantOp>(loc, resultZeroAttr);
Value zeroTensor =
rewriter.create<linalg::FillOp>(loc, zero, initTensor).getResult(0);
Value biasInitTensor = rewriter.create<linalg::InitTensorOp>(
loc, resultTy.getShape(), resultETy);
if (!isQuantized) {
Value conv = rewriter
.create<linalg::DepthwiseConv2DNhwcHwcmOp>(
loc, linalgConvTy, ValueRange{input, weight},
ValueRange{zeroTensor}, strideAttr, dilationAttr)
.getResult(0);
Value convReshape = rewriter.create<tosa::ReshapeOp>(loc, resultTy, conv);
Value result =
rewriter
.create<linalg::GenericOp>(
loc, resultTy, ValueRange({bias, convReshape}),
biasInitTensor, indexingMaps,
getNParallelLoopsAttrs(resultTy.getRank()),
[&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange args) {
Value added = nestedBuilder.create<arith::AddFOp>(
loc, args[0], args[1]);
nestedBuilder.create<linalg::YieldOp>(nestedLoc, added);
})
.getResult(0);
rewriter.replaceOp(op, result);
} else {
auto iZpVal = rewriter.create<arith::ConstantOp>(loc, iZp);
auto kZpVal = rewriter.create<arith::ConstantOp>(loc, kZp);
Value conv =
rewriter
.create<linalg::DepthwiseConv2DNhwcHwcmQOp>(
loc, linalgConvTy, ValueRange{input, weight, iZpVal, kZpVal},
ValueRange{zeroTensor}, strideAttr, dilationAttr)
.getResult(0);
Value convReshape = rewriter.create<tosa::ReshapeOp>(loc, resultTy, conv);
Value result =
rewriter
.create<linalg::GenericOp>(
loc, resultTy, ValueRange({bias, convReshape}),
biasInitTensor, indexingMaps,
getNParallelLoopsAttrs(resultTy.getRank()),
[&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange args) {
Value added = nestedBuilder.create<arith::AddIOp>(
loc, args[0], args[1]);
nestedBuilder.create<linalg::YieldOp>(nestedLoc, added);
})
.getResult(0);
rewriter.replaceOp(op, result);
}
return success();
}
};
class MatMulConverter : public OpConversionPattern<tosa::MatMulOp> {
public:
using OpConversionPattern<tosa::MatMulOp>::OpConversionPattern;
LogicalResult
matchAndRewrite(tosa::MatMulOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const final {
Location loc = op.getLoc();
auto outputTy = op.getType().cast<ShapedType>();
auto outputElementTy = outputTy.getElementType();
auto firstOperandTy = op->getOperand(0).getType().cast<ShapedType>();
auto secondOperandTy = op->getOperand(1).getType().cast<ShapedType>();
SmallVector<Value> dynDims;
dynDims.resize(op->getResult(0).getType().cast<ShapedType>().getRank());
if (!firstOperandTy.hasRank() || firstOperandTy.isDynamicDim(0)) {
dynDims[0] = rewriter.create<tensor::DimOp>(loc, op->getOperand(0), 0);
}
if (!firstOperandTy.hasRank() || firstOperandTy.isDynamicDim(1)) {
dynDims[1] = rewriter.create<tensor::DimOp>(loc, op->getOperand(0), 1);
}
if (!secondOperandTy.hasRank() || secondOperandTy.isDynamicDim(2)) {
dynDims[2] = rewriter.create<tensor::DimOp>(loc, op->getOperand(1), 2);
}
SmallVector<Value> filteredDims = filterDynamicDims(dynDims);
auto zeroAttr = rewriter.getZeroAttr(outputElementTy);
Value zero = rewriter.create<arith::ConstantOp>(loc, zeroAttr);
auto initTensor = rewriter.create<linalg::InitTensorOp>(
loc, filteredDims, outputTy.getShape(), outputTy.getElementType());
Value zeroTensor =
rewriter.create<linalg::FillOp>(loc, zero, initTensor).getResult(0);
if (!op.quantization_info()) {
rewriter.replaceOpWithNewOp<linalg::BatchMatmulOp>(
op, TypeRange{op.getType()}, ValueRange{adaptor.a(), adaptor.b()},
ValueRange{zeroTensor});
return success();
}
auto quantizationInfo = op.quantization_info().getValue();
auto aZp = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(
quantizationInfo.a_zp().getValue().getSExtValue()));
auto bZp = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(
quantizationInfo.b_zp().getValue().getSExtValue()));
rewriter.replaceOpWithNewOp<linalg::QuantizedBatchMatmulOp>(
op, TypeRange{op.getType()},
ValueRange{adaptor.a(), adaptor.b(), aZp, bZp}, zeroTensor);
return success();
}
};
class FullyConnectedConverter
: public OpConversionPattern<tosa::FullyConnectedOp> {
public:
using OpConversionPattern<tosa::FullyConnectedOp>::OpConversionPattern;
LogicalResult
matchAndRewrite(tosa::FullyConnectedOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const final {
Location loc = op.getLoc();
auto outputTy = op.getType().cast<ShapedType>();
auto input = op.input();
auto inputTy = input.getType().cast<ShapedType>();
auto bias = op.bias();
auto weight = op.weight();
auto weightTy = weight.getType().cast<ShapedType>();
auto weightShape = weightTy.getShape();
auto outputETy = outputTy.getElementType();
SmallVector<Value> dynDims;
dynDims.resize(op->getResult(0).getType().cast<ShapedType>().getRank());
if (!inputTy.hasRank() || inputTy.isDynamicDim(0)) {
dynDims[0] = rewriter.create<tensor::DimOp>(loc, input, 0);
}
if (!weightTy.hasRank() || weightTy.isDynamicDim(0)) {
dynDims[1] = rewriter.create<tensor::DimOp>(loc, weight, 0);
}
SmallVector<Value> filteredDims = filterDynamicDims(dynDims);
// Creating maps for the output of MatMul and the bias
SmallVector<AffineMap, 4> indexingMaps;
// Broadcast the bias.
indexingMaps.push_back(AffineMap::get(/*dimCount=*/2, /*symbolCount=*/0,
{rewriter.getAffineDimExpr(1)},
rewriter.getContext()));
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank()));
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank()));
auto initTensor = rewriter.create<linalg::InitTensorOp>(
loc, filteredDims, outputTy.getShape(), outputTy.getElementType());
// When quantized, the input elemeny type is not the same as the output
Attribute resultZeroAttr = rewriter.getZeroAttr(outputETy);
Value zero = rewriter.create<arith::ConstantOp>(loc, resultZeroAttr);
Value zeroTensor =
rewriter.create<linalg::FillOp>(loc, zero, initTensor).getResult(0);
SmallVector<int64_t> permutation{1, 0};
auto permutationAttr = DenseIntElementsAttr::get(
RankedTensorType::get({2}, rewriter.getI64Type()), permutation);
Value permutationValue =
rewriter.create<arith::ConstantOp>(loc, permutationAttr);
SmallVector<int64_t> newWeightShape{weightShape[1], weightShape[0]};
Type newWeightTy =
RankedTensorType::get(newWeightShape, weightTy.getElementType());
Value transposedWeight = rewriter.create<tosa::TransposeOp>(
loc, newWeightTy, weight, permutationValue);
auto biasInitTensor =
rewriter
.create<linalg::InitTensorOp>(loc, filteredDims,
outputTy.getShape(), outputETy)
->getResults();
if (!op.quantization_info()) {
Value matmul = rewriter
.create<linalg::MatmulOp>(
loc, TypeRange{op.getType()},
ValueRange{input, transposedWeight}, zeroTensor)
->getResult(0);
Value result =
rewriter
.create<linalg::GenericOp>(
loc, outputTy, ValueRange({bias, matmul}), biasInitTensor,
indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()),
[&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange args) {
Value added = nestedBuilder.create<arith::AddFOp>(
loc, args[0], args[1]);
nestedBuilder.create<linalg::YieldOp>(nestedLoc, added);
})
.getResult(0);
rewriter.replaceOp(op, result);
return success();
}
auto quantizationInfo = op.quantization_info().getValue();
auto inputZp = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(
quantizationInfo.input_zp().getValue().getSExtValue()));
auto outputZp = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(
quantizationInfo.weight_zp().getValue().getSExtValue()));
Value matmul =
rewriter
.create<linalg::QuantizedMatmulOp>(
loc, TypeRange{op.getType()},
ValueRange{input, transposedWeight, inputZp, outputZp},
zeroTensor)
->getResult(0);
Value result =
rewriter
.create<linalg::GenericOp>(
loc, outputTy, ValueRange({bias, matmul}), biasInitTensor,
indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()),
[&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange args) {
Value added = nestedBuilder.create<arith::AddIOp>(
loc, args[0], args[1]);
nestedBuilder.create<linalg::YieldOp>(nestedLoc, added);
})
.getResult(0);
rewriter.replaceOp(op, result);
return success();
}
};
class ReshapeConverterCollapse : public OpConversionPattern<tosa::ReshapeOp> {
public:
using OpConversionPattern<tosa::ReshapeOp>::OpConversionPattern;
LogicalResult
matchAndRewrite(tosa::ReshapeOp reshape, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const final {
ShapedType operandTy = adaptor.input1().getType().cast<ShapedType>();
ShapedType resultTy = reshape.getType().template cast<ShapedType>();
bool isDynamic = !operandTy.hasStaticShape();
if (isDynamic && resultTy.getRank() != 1) {
return rewriter.notifyMatchFailure(
reshape, "Cannot collapse dynamic dims to more than one dimension");
}
if (operandTy == resultTy) {
rewriter.replaceOp(reshape, adaptor.getOperands()[0]);
return success();
}
SmallVector<ReassociationExprs, 4> reassociationMap;
if (!createReassociationMapsForCollapse(rewriter, operandTy.getShape(),
resultTy.getShape(),
reassociationMap, isDynamic)) {
return rewriter.notifyMatchFailure(
reshape,
"tosa.reshape Attempting to collapse into an incompatible shape");
}
SmallVector<int64_t> intermediateShape;
if (!findIntermediateShape(operandTy.getShape(), resultTy.getShape(),
intermediateShape, isDynamic)) {
return rewriter.notifyMatchFailure(
reshape, "tosa.reshape Cannot collapse into given shape");
}
rewriter.replaceOpWithNewOp<linalg::TensorCollapseShapeOp>(
reshape, resultTy, adaptor.getOperands()[0], reassociationMap);
return success();
}
};
class ReshapeConverterExpand : public OpConversionPattern<tosa::ReshapeOp> {
public:
using OpConversionPattern<tosa::ReshapeOp>::OpConversionPattern;
LogicalResult
matchAndRewrite(tosa::ReshapeOp reshape, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const final {
ShapedType operandTy = adaptor.input1().getType().cast<ShapedType>();
ShapedType resultTy = reshape.getType().template cast<ShapedType>();
bool isDynamic = !operandTy.hasStaticShape();
if (operandTy == resultTy) {
rewriter.replaceOp(reshape, adaptor.getOperands()[0]);
return success();
}
if (isDynamic && operandTy.getRank() != 1) {
return rewriter.notifyMatchFailure(
reshape, "Cannot expand dynamic dims from more than one dimension");
}
SmallVector<ReassociationExprs, 4> reassociationMap;
if (!createReassociationMapsForCollapse(rewriter, resultTy.getShape(),
operandTy.getShape(),
reassociationMap, isDynamic)) {
return rewriter.notifyMatchFailure(
reshape,
"tosa.reshape Attempting to expand into an incompatible shape");
}
SmallVector<int64_t> intermediateShape;
if (!findIntermediateShape(operandTy.getShape(), resultTy.getShape(),
intermediateShape, isDynamic) ||
intermediateShape != operandTy.getShape()) {
return rewriter.notifyMatchFailure(
reshape, "tosa.reshape Cannot expand into given shape");
}
rewriter.replaceOpWithNewOp<linalg::TensorExpandShapeOp>(
reshape, resultTy, adaptor.getOperands()[0], reassociationMap);
return success();
}
};
class ReshapeConverterCollapseExpand
: public OpConversionPattern<tosa::ReshapeOp> {
public:
using OpConversionPattern<tosa::ReshapeOp>::OpConversionPattern;
LogicalResult
matchAndRewrite(tosa::ReshapeOp reshape, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const final {
ShapedType operandTy = adaptor.input1().getType().cast<ShapedType>();
ShapedType resultTy = reshape.getType().template cast<ShapedType>();
bool isDynamic = !operandTy.hasStaticShape();
if (operandTy == resultTy) {
rewriter.replaceOp(reshape, adaptor.getOperands()[0]);
return success();
}
SmallVector<int64_t> intermediateShape;
if (!findIntermediateShape(resultTy.getShape(), operandTy.getShape(),
intermediateShape, isDynamic)) {
return rewriter.notifyMatchFailure(
reshape, "tosa.reshape Cannot identify an intermediate shape between "
"the given two shapes");
}
Value collapse = rewriter.create<tosa::ReshapeOp>(
reshape.getLoc(),
RankedTensorType::get(intermediateShape,
reshape.getType().getElementType()),
adaptor.input1());
Value expand =
rewriter.create<tosa::ReshapeOp>(reshape.getLoc(), resultTy, collapse);
rewriter.replaceOp(reshape, expand);
return success();
}
};
class TransposeConverter : public OpRewritePattern<tosa::TransposeOp> {
public:
using OpRewritePattern<tosa::TransposeOp>::OpRewritePattern;
LogicalResult matchAndRewrite(tosa::TransposeOp op,
PatternRewriter &rewriter) const final {
DenseIntElementsAttr perms;
if (!matchPattern(op.perms(), m_Constant(&perms))) {
return failure();
}
auto loc = op.getLoc();
auto input = op->getOperand(0);
auto resultTy = op.getType().cast<ShapedType>();
SmallVector<Value> dynDims;
dynDims.resize(op->getResult(0).getType().cast<ShapedType>().getRank());
SmallVector<AffineExpr, 2> inputExprs;
inputExprs.resize(resultTy.getRank());
auto operandTy = input.getType().cast<ShapedType>();
for (auto permutation : llvm::enumerate(perms.getValues<APInt>())) {
auto index = permutation.index();
auto value = permutation.value().getZExtValue();
if (!operandTy.hasRank() || operandTy.isDynamicDim(index)) {
dynDims[value] = rewriter.create<tensor::DimOp>(loc, input, index);
}
inputExprs[value] = rewriter.getAffineDimExpr(index);
}
SmallVector<Value> filteredDims = filterDynamicDims(dynDims);
auto initTensor = rewriter.create<linalg::InitTensorOp>(
loc, filteredDims, resultTy.getShape(), resultTy.getElementType());
SmallVector<AffineMap, 2> affineMaps = {
AffineMap::get(resultTy.getRank(), /*symbolCount=*/0, inputExprs,
rewriter.getContext()),
rewriter.getMultiDimIdentityMap(resultTy.getRank())};
rewriter.replaceOpWithNewOp<linalg::GenericOp>(
op, resultTy, op.input1(), ValueRange{initTensor}, affineMaps,
getNParallelLoopsAttrs(resultTy.getRank()),
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {
nestedBuilder.create<linalg::YieldOp>(loc, *args.begin());
});
return success();
}
};
class RescaleConverter : public OpRewritePattern<tosa::RescaleOp> {
public:
using OpRewritePattern<tosa::RescaleOp>::OpRewritePattern;
LogicalResult matchAndRewrite(tosa::RescaleOp op,
PatternRewriter &rewriter) const final {
auto loc = op.getLoc();
auto input = op.input();
auto inputTy = op.input().getType().cast<ShapedType>();
auto outputTy = op.output().getType().cast<ShapedType>();
unsigned rank = inputTy.getRank();
// This is an illegal configuration. terminate and log an error
if (op.double_round() && !op.scale32())
return rewriter.notifyMatchFailure(
op, "tosa.rescale requires scale32 for double_round to be true");
if (!outputTy.hasStaticShape())
return rewriter.notifyMatchFailure(
op, "tosa to linalg conversion expects statically shaped tensors");
// The shift and multiplier values.
SmallVector<int32_t> multiplierValues;
getValuesFromIntArrayAttribute(op.multiplier(), multiplierValues);
SmallVector<int8_t> shiftValues;
getValuesFromIntArrayAttribute(op.shift(), shiftValues);
// Double round only occurs if shift is greater than 31, check that this
// is ever true.
bool doubleRound =
op.double_round() &&
llvm::any_of(shiftValues, [](int32_t v) { return v > 31; });
SmallVector<AffineMap> indexingMaps = {
rewriter.getMultiDimIdentityMap(rank)};
SmallVector<Value, 4> genericInputs = {input};
// If we are rescaling per-channel then we need to store the multiplier
// values in a buffer.
Value multiplierConstant;
int64_t multiplierArg = 0;
if (multiplierValues.size() == 1) {
multiplierConstant = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(multiplierValues.front()));
} else {
SmallVector<AffineExpr, 2> multiplierExprs{
rewriter.getAffineDimExpr(rank - 1)};
auto multiplierType =
RankedTensorType::get({static_cast<int64_t>(multiplierValues.size())},
rewriter.getI32Type());
genericInputs.push_back(rewriter.create<arith::ConstantOp>(
loc, DenseIntElementsAttr::get(multiplierType, multiplierValues)));
indexingMaps.push_back(AffineMap::get(/*dimCount=*/rank,
/*symbolCount=*/0, multiplierExprs,
rewriter.getContext()));
multiplierArg = indexingMaps.size() - 1;
}
// If we are rescaling per-channel then we need to store the shift
// values in a buffer.
Value shiftConstant;
int64_t shiftArg = 0;
if (shiftValues.size() == 1) {
shiftConstant = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI8IntegerAttr(shiftValues.front()));
} else {
SmallVector<AffineExpr, 2> shiftExprs = {
rewriter.getAffineDimExpr(rank - 1)};
auto shiftType =
RankedTensorType::get({static_cast<int64_t>(shiftValues.size())},
rewriter.getIntegerType(8));
genericInputs.push_back(rewriter.create<arith::ConstantOp>(
loc, DenseIntElementsAttr::get(shiftType, shiftValues)));
indexingMaps.push_back(AffineMap::get(/*dimCount=*/rank,
/*symbolCount=*/0, shiftExprs,
rewriter.getContext()));
shiftArg = indexingMaps.size() - 1;
}
// Indexing maps for output values.
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(rank));
// Construct the indexing maps needed for linalg.generic ops.
Value initTensor = rewriter.create<linalg::InitTensorOp>(
loc, ArrayRef<Value>({}), outputTy.getShape(),
outputTy.getElementType());
auto linalgOp = rewriter.create<linalg::GenericOp>(
loc, outputTy, genericInputs, ValueRange{initTensor}, indexingMaps,
getNParallelLoopsAttrs(rank),
[&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange blockArgs) {
Value value = blockArgs[0];
Type valueTy = value.getType();
// For now we do all of our math in 64-bit. This is not optimal but
// should be correct for now, consider computing correct bit depth
// later.
int32_t inBitwidth = valueTy.getIntOrFloatBitWidth() > 32 ? 48 : 32;
auto inputZp = createConstFromIntAttribute<int32_t>(
op, "input_zp", nestedBuilder.getIntegerType(inBitwidth),
nestedBuilder);
auto outputZp = createConstFromIntAttribute<int32_t>(
op, "output_zp", nestedBuilder.getI32Type(), nestedBuilder);
Value multiplier = multiplierConstant ? multiplierConstant
: blockArgs[multiplierArg];
Value shift = shiftConstant ? shiftConstant : blockArgs[shiftArg];
if (valueTy.getIntOrFloatBitWidth() < 32) {
if (valueTy.isUnsignedInteger()) {
value = nestedBuilder
.create<UnrealizedConversionCastOp>(
nestedLoc,
nestedBuilder.getIntegerType(
valueTy.getIntOrFloatBitWidth()),
value)
.getResult(0);
value = nestedBuilder.create<arith::ExtUIOp>(
nestedLoc, nestedBuilder.getI32Type(), value);
} else {
value = nestedBuilder.create<arith::ExtSIOp>(
nestedLoc, nestedBuilder.getI32Type(), value);
}
}
value =
nestedBuilder.create<arith::SubIOp>(nestedLoc, value, inputZp);
value = nestedBuilder.create<tosa::ApplyScaleOp>(
loc, nestedBuilder.getI32Type(), value, multiplier, shift,
nestedBuilder.getBoolAttr(doubleRound));
// Move to the new zero-point.
value =
nestedBuilder.create<arith::AddIOp>(nestedLoc, value, outputZp);
// Saturate to the output size.
IntegerType outIntType =
blockArgs.back().getType().cast<IntegerType>();
unsigned outBitWidth = outIntType.getWidth();
int32_t intMin = APInt::getSignedMinValue(outBitWidth).getSExtValue();
int32_t intMax = APInt::getSignedMaxValue(outBitWidth).getSExtValue();
// Unsigned integers have a difference output value.
if (outIntType.isUnsignedInteger()) {
intMin = 0;
intMax = APInt::getMaxValue(outBitWidth).getZExtValue();
}
auto intMinVal = nestedBuilder.create<arith::ConstantOp>(
loc, nestedBuilder.getI32IntegerAttr(intMin));
auto intMaxVal = nestedBuilder.create<arith::ConstantOp>(
loc, nestedBuilder.getI32IntegerAttr(intMax));
value = clampHelper<arith::CmpIOp>(
nestedLoc, value, intMinVal, intMaxVal, arith::CmpIPredicate::slt,
nestedBuilder);
if (outIntType.getWidth() < 32) {
value = nestedBuilder.create<arith::TruncIOp>(
nestedLoc, rewriter.getIntegerType(outIntType.getWidth()),
value);
if (outIntType.isUnsignedInteger()) {
value = nestedBuilder
.create<UnrealizedConversionCastOp>(nestedLoc,
outIntType, value)
.getResult(0);
}
}
nestedBuilder.create<linalg::YieldOp>(loc, value);
});
rewriter.replaceOp(op, linalgOp->getResults());
return success();
}
};
class ResizeConverter : public OpRewritePattern<tosa::ResizeOp> {
public:
using OpRewritePattern<tosa::ResizeOp>::OpRewritePattern;
LogicalResult matchAndRewrite(tosa::ResizeOp op,
PatternRewriter &rewriter) const final {
Location loc = op.getLoc();
auto input = op.input();
auto inputTy = input.getType().cast<ShapedType>();
auto resultTy = op.getType().cast<ShapedType>();
auto resultElementTy = resultTy.getElementType();
auto imageH = inputTy.getShape()[1];
auto imageW = inputTy.getShape()[2];
if (!resultTy.hasStaticShape())
return failure();
if (op.mode() != "NEAREST_NEIGHBOR" && op.mode() != "BILINEAR")
return failure();
auto initTensor =
rewriter
.create<linalg::InitTensorOp>(loc, ArrayRef<Value>{},
resultTy.getShape(), resultElementTy)
.result();
SmallVector<AffineMap, 2> affineMaps = {
rewriter.getMultiDimIdentityMap(resultTy.getRank())};
auto genericOp = rewriter.create<linalg::GenericOp>(
loc, resultTy, ValueRange({}), ValueRange{initTensor}, affineMaps,
getNParallelLoopsAttrs(resultTy.getRank()));
rewriter.replaceOp(op, genericOp.getResult(0));
{
OpBuilder::InsertionGuard regionGuard(rewriter);
rewriter.createBlock(&genericOp.region(), genericOp.region().end(),
TypeRange({resultElementTy}));
Value batch = rewriter.create<linalg::IndexOp>(loc, 0);
Value y = rewriter.create<linalg::IndexOp>(loc, 1);
Value x = rewriter.create<linalg::IndexOp>(loc, 2);
Value channel = rewriter.create<linalg::IndexOp>(loc, 3);
auto hwMin = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(0));
auto hMax = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(imageH - 1));
auto wMax = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(imageW - 1));
Value inY =
rewriter.create<arith::IndexCastOp>(loc, rewriter.getI32Type(), y);
Value inX =
rewriter.create<arith::IndexCastOp>(loc, rewriter.getI32Type(), x);
int32_t shift = op.shift();
bool floatingPointMode = shift == 0;
Value yStride, xStride, yOffset, xOffset;
if (floatingPointMode) {
yStride = rewriter.create<arith::ConstantOp>(loc, op.stride_fp()[0]);
xStride = rewriter.create<arith::ConstantOp>(loc, op.stride_fp()[1]);
yOffset = rewriter.create<arith::ConstantOp>(loc, op.offset_fp()[0]);
xOffset = rewriter.create<arith::ConstantOp>(loc, op.offset_fp()[1]);
} else {
SmallVector<int32_t> stride, offset;
getValuesFromIntArrayAttribute(op.stride(), stride);
getValuesFromIntArrayAttribute(op.offset(), offset);
yStride = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(stride[0]));
xStride = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(stride[1]));
yOffset = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(offset[0]));
xOffset = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(offset[1]));
}
// Compute the the integer index and partial offset.
// x = x * stride + offset;
// ix = floor(x)
// dx = x - ix
Value ix, iy, dx, dy;
if (floatingPointMode) {
Value y =
rewriter.create<arith::UIToFPOp>(loc, rewriter.getF32Type(), inY);
Value x =
rewriter.create<arith::UIToFPOp>(loc, rewriter.getF32Type(), inX);
y = rewriter.create<arith::MulFOp>(loc, y, yStride);
x = rewriter.create<arith::MulFOp>(loc, x, xStride);
y = rewriter.create<arith::AddFOp>(loc, y, yOffset);
x = rewriter.create<arith::AddFOp>(loc, x, xOffset);
iy = rewriter.create<math::FloorOp>(loc, y);
ix = rewriter.create<math::FloorOp>(loc, x);
dy = rewriter.create<arith::SubFOp>(loc, y, iy);
dx = rewriter.create<arith::SubFOp>(loc, x, ix);
iy = rewriter.create<arith::FPToSIOp>(loc, rewriter.getI32Type(), iy);
ix = rewriter.create<arith::FPToSIOp>(loc, rewriter.getI32Type(), ix);
} else {
Value shiftVal = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(shift));
Value y = rewriter.create<arith::MulIOp>(loc, inY, yStride);
Value x = rewriter.create<arith::MulIOp>(loc, inX, xStride);
y = rewriter.create<arith::AddIOp>(loc, y, yOffset);
x = rewriter.create<arith::AddIOp>(loc, x, xOffset);
iy = rewriter.create<arith::ShRSIOp>(loc, y, shiftVal);
ix = rewriter.create<arith::ShRSIOp>(loc, x, shiftVal);
Value yTrunc = rewriter.create<arith::ShLIOp>(loc, iy, shiftVal);
Value xTrunc = rewriter.create<arith::ShLIOp>(loc, ix, shiftVal);
dy = rewriter.create<arith::SubIOp>(loc, y, yTrunc);
dx = rewriter.create<arith::SubIOp>(loc, x, xTrunc);
}
if (op.mode() == "NEAREST_NEIGHBOR") {
Value yPred, xPred;
// Round the index position towards the closest pixel location.
if (floatingPointMode) {
auto halfVal = rewriter.create<arith::ConstantOp>(
loc, rewriter.getF32FloatAttr(0.5f));
yPred = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE,
dy, halfVal);
xPred = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE,
dx, halfVal);
} else {
auto halfVal = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(1 << (shift - 1)));
yPred = rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::sge,
dy, halfVal);
xPred = rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::sge,
dx, halfVal);
}
auto zeroVal = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(0));
auto oneVal = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(1));
auto yOffset =
rewriter.create<mlir::SelectOp>(loc, yPred, oneVal, zeroVal);
auto xOffset =
rewriter.create<mlir::SelectOp>(loc, xPred, oneVal, zeroVal);
iy = rewriter.create<arith::AddIOp>(loc, iy, yOffset);
ix = rewriter.create<arith::AddIOp>(loc, ix, xOffset);
// Clamp the to be within the bounds of the input image.
iy = clampHelper<arith::CmpIOp>(loc, iy, hwMin, hMax,
arith::CmpIPredicate::slt, rewriter);
ix = clampHelper<arith::CmpIOp>(loc, ix, hwMin, wMax,
arith::CmpIPredicate::slt, rewriter);
// Read the value from the input array.
iy = rewriter.create<arith::IndexCastOp>(loc, rewriter.getIndexType(),
iy);
ix = rewriter.create<arith::IndexCastOp>(loc, rewriter.getIndexType(),
ix);
Value result = rewriter.create<tensor::ExtractOp>(
loc, input, ValueRange{batch, iy, ix, channel});
rewriter.create<linalg::YieldOp>(loc, result);
return success();
}
if (op.mode() == "BILINEAR") {
Value y0 = iy;
Value x0 = ix;
auto oneVal = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(1));
Value y1 = rewriter.create<arith::AddIOp>(loc, y0, oneVal);
Value x1 = rewriter.create<arith::AddIOp>(loc, x0, oneVal);
y0 = clampHelper<arith::CmpIOp>(loc, y0, hwMin, hMax,
arith::CmpIPredicate::slt, rewriter);
y1 = clampHelper<arith::CmpIOp>(loc, y1, hwMin, hMax,
arith::CmpIPredicate::slt, rewriter);
x0 = clampHelper<arith::CmpIOp>(loc, x0, hwMin, wMax,
arith::CmpIPredicate::slt, rewriter);
x1 = clampHelper<arith::CmpIOp>(loc, x1, hwMin, wMax,
arith::CmpIPredicate::slt, rewriter);
y0 = rewriter.create<arith::IndexCastOp>(loc, rewriter.getIndexType(),
y0);
y1 = rewriter.create<arith::IndexCastOp>(loc, rewriter.getIndexType(),
y1);
x0 = rewriter.create<arith::IndexCastOp>(loc, rewriter.getIndexType(),
x0);
x1 = rewriter.create<arith::IndexCastOp>(loc, rewriter.getIndexType(),
x1);
Value y0x0 = rewriter.create<tensor::ExtractOp>(
loc, input, ValueRange{batch, y0, x0, channel});
Value y0x1 = rewriter.create<tensor::ExtractOp>(
loc, input, ValueRange{batch, y0, x1, channel});
Value y1x0 = rewriter.create<tensor::ExtractOp>(
loc, input, ValueRange{batch, y1, x0, channel});
Value y1x1 = rewriter.create<tensor::ExtractOp>(
loc, input, ValueRange{batch, y1, x1, channel});
if (floatingPointMode) {
auto oneVal = rewriter.create<arith::ConstantOp>(
loc, rewriter.getF32FloatAttr(1.f));
Value rightPart = dx;
Value leftPart = rewriter.create<arith::SubFOp>(loc, oneVal, dx);
y0x0 = rewriter.create<arith::MulFOp>(loc, y0x0, leftPart);
y0x1 = rewriter.create<arith::MulFOp>(loc, y0x1, rightPart);
Value topAcc = rewriter.create<arith::AddFOp>(loc, y0x0, y0x1);
y1x0 = rewriter.create<arith::MulFOp>(loc, y1x0, leftPart);
y1x1 = rewriter.create<arith::MulFOp>(loc, y1x1, rightPart);
Value bottomAcc = rewriter.create<arith::AddFOp>(loc, y1x0, y1x1);
Value bottomPart = dy;
Value topPart = rewriter.create<arith::SubFOp>(loc, oneVal, dy);
topAcc = rewriter.create<arith::MulFOp>(loc, topAcc, topPart);
bottomAcc =
rewriter.create<arith::MulFOp>(loc, bottomAcc, bottomPart);
Value result = rewriter.create<arith::AddFOp>(loc, topAcc, bottomAcc);
rewriter.create<linalg::YieldOp>(loc, result);
return success();
} else {
y0x0 = rewriter.create<arith::ExtSIOp>(loc, resultElementTy, y0x0);
y0x1 = rewriter.create<arith::ExtSIOp>(loc, resultElementTy, y0x1);
y1x0 = rewriter.create<arith::ExtSIOp>(loc, resultElementTy, y1x0);
y1x1 = rewriter.create<arith::ExtSIOp>(loc, resultElementTy, y1x1);
if (resultElementTy.getIntOrFloatBitWidth() > 32) {
dx = rewriter.create<arith::ExtSIOp>(loc, resultElementTy, dx);
dy = rewriter.create<arith::ExtSIOp>(loc, resultElementTy, dy);
}
auto unitVal = rewriter.create<arith::ConstantOp>(
loc, rewriter.getIntegerAttr(resultElementTy, 1 << shift));
Value rightPart = dx;
Value leftPart = rewriter.create<arith::SubIOp>(loc, unitVal, dx);
y0x0 = rewriter.create<arith::MulIOp>(loc, y0x0, leftPart);
y0x1 = rewriter.create<arith::MulIOp>(loc, y0x1, rightPart);
Value topAcc = rewriter.create<arith::AddIOp>(loc, y0x0, y0x1);
y1x0 = rewriter.create<arith::MulIOp>(loc, y1x0, leftPart);
y1x1 = rewriter.create<arith::MulIOp>(loc, y1x1, rightPart);
Value bottomAcc = rewriter.create<arith::AddIOp>(loc, y1x0, y1x1);
Value bottomPart = dy;
Value topPart = rewriter.create<arith::SubIOp>(loc, unitVal, dy);
topAcc = rewriter.create<arith::MulIOp>(loc, topAcc, topPart);
bottomAcc =
rewriter.create<arith::MulIOp>(loc, bottomAcc, bottomPart);
Value result = rewriter.create<arith::AddIOp>(loc, topAcc, bottomAcc);
rewriter.create<linalg::YieldOp>(loc, result);
return success();
}
}
return failure();
}
return success();
}
};
// At the codegen level any identity operations should be removed. Any cases
// where identity is load-bearing (e.g. cross device computation) should be
// handled before lowering to codegen.
template <typename SrcOp>
class IdentityNConverter : public OpRewritePattern<SrcOp> {
public:
using OpRewritePattern<SrcOp>::OpRewritePattern;
LogicalResult matchAndRewrite(SrcOp op,
PatternRewriter &rewriter) const final {
rewriter.replaceOp(op, op.getOperation()->getOperands());
return success();
}
};
template <typename SrcOp>
class ReduceConverter : public OpRewritePattern<SrcOp> {
public:
using OpRewritePattern<SrcOp>::OpRewritePattern;
LogicalResult matchAndRewrite(SrcOp reduceOp,
PatternRewriter &rewriter) const final {
return reduceMatchAndRewriteHelper(reduceOp, reduceOp.axis(), rewriter);
}
};
struct ConcatConverter : public OpConversionPattern<tosa::ConcatOp> {
using OpConversionPattern<tosa::ConcatOp>::OpConversionPattern;
LogicalResult
matchAndRewrite(tosa::ConcatOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto resultType = op.getType().dyn_cast<RankedTensorType>();
if (!resultType || !resultType.hasStaticShape()) {
return rewriter.notifyMatchFailure(op,
"expected static shaped tensor type");
}
Location loc = op.getLoc();
int axis = op.axis();
Value axisValue = rewriter.createOrFold<arith::ConstantOp>(
loc, rewriter.getIndexAttr(axis));
int rank = resultType.getRank();
SmallVector<Value, 3> offsets, sizes, strides;
sizes.reserve(rank);
strides.resize(rank, rewriter.create<arith::ConstantIndexOp>(loc, 1));
offsets.resize(rank, rewriter.create<arith::ConstantIndexOp>(loc, 0));
for (int i = 0; i < rank; ++i) {
sizes.push_back(rewriter.createOrFold<tensor::DimOp>(
loc, adaptor.getOperands()[0], i));
}
Value resultDimSize = sizes[axis];
for (auto arg : adaptor.getOperands().drop_front()) {
auto size = rewriter.createOrFold<tensor::DimOp>(loc, arg, axisValue);
resultDimSize =
rewriter.createOrFold<arith::AddIOp>(loc, resultDimSize, size);
}
sizes[axis] = resultDimSize;
Value init = rewriter.create<linalg::InitTensorOp>(
loc, resultType.getShape(), resultType.getElementType());
Value zeroVal = rewriter.createOrFold<arith::ConstantOp>(
loc, rewriter.getZeroAttr(resultType.getElementType()));
Value result =
rewriter.create<linalg::FillOp>(loc, zeroVal, init).getResult(0);
auto toOpFoldResult = [](Value v) -> OpFoldResult {
auto op = v.getDefiningOp<arith::ConstantIndexOp>();
if (!op)
return v;
return op.getValue();
};
for (auto arg : adaptor.getOperands()) {
sizes[axis] = rewriter.createOrFold<tensor::DimOp>(loc, arg, axisValue);
result = rewriter.createOrFold<tensor::InsertSliceOp>(
loc, arg, result,
llvm::to_vector(llvm::map_range(offsets, toOpFoldResult)),
llvm::to_vector(llvm::map_range(sizes, toOpFoldResult)),
llvm::to_vector(llvm::map_range(strides, toOpFoldResult)));
offsets[axis] =
rewriter.createOrFold<arith::AddIOp>(loc, offsets[axis], sizes[axis]);
}
rewriter.replaceOp(op, result);
return success();
}
};
class ReverseConverter : public OpRewritePattern<tosa::ReverseOp> {
public:
using OpRewritePattern<tosa::ReverseOp>::OpRewritePattern;
LogicalResult matchAndRewrite(tosa::ReverseOp op,
PatternRewriter &rewriter) const final {
auto loc = op.getLoc();
Value input = op.input();
auto inputTy = input.getType().template cast<ShapedType>();
auto resultTy = op.getType().template cast<ShapedType>();
auto axis = op.axis();
SmallVector<Value> dynDims;
for (int i = 0; i < inputTy.getRank(); i++) {
if (inputTy.isDynamicDim(i)) {
dynDims.push_back(rewriter.create<tensor::DimOp>(loc, input, i));
}
}
Value axisDimSize = rewriter.create<tensor::DimOp>(loc, input, axis);
// First fill the output buffer with the init value.
auto initTensor = rewriter
.create<linalg::InitTensorOp>(
loc, ArrayRef<Value>({dynDims}),
inputTy.getShape(), inputTy.getElementType())
.result();
SmallVector<AffineMap, 2> affineMaps = {
rewriter.getMultiDimIdentityMap(resultTy.getRank())};
rewriter.replaceOpWithNewOp<linalg::GenericOp>(
op, resultTy, ArrayRef<Value>({}), ValueRange{initTensor}, affineMaps,
getNParallelLoopsAttrs(resultTy.getRank()),
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {
llvm::SmallVector<Value> indices;
for (unsigned int i = 0; i < inputTy.getRank(); i++) {
auto index =
rewriter.create<linalg::IndexOp>(nestedLoc, i).getResult();
if (i == axis) {
auto one = rewriter.create<arith::ConstantIndexOp>(nestedLoc, 1);
auto sizeMinusOne =
rewriter.create<arith::SubIOp>(nestedLoc, axisDimSize, one);
index = rewriter.create<arith::SubIOp>(nestedLoc, sizeMinusOne,
index);
}
indices.push_back(index);
}
auto extract = nestedBuilder.create<tensor::ExtractOp>(
nestedLoc, input, indices);
nestedBuilder.create<linalg::YieldOp>(op.getLoc(),
extract.getResult());
});
return success();
}
};
// This converter translate a tile operation to a reshape, broadcast, reshape.
// The first reshape minimally expands each tiled dimension to include a
// proceding size-1 dim. This dim is then broadcasted to the appropriate
// multiple.
struct TileConverter : public OpConversionPattern<tosa::TileOp> {
using OpConversionPattern<tosa::TileOp>::OpConversionPattern;
LogicalResult
matchAndRewrite(tosa::TileOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto loc = op.getLoc();
auto input = op.input1();
auto inputTy = input.getType().cast<ShapedType>();
auto inputShape = inputTy.getShape();
auto resultTy = op.getType().cast<ShapedType>();
auto elementTy = inputTy.getElementType();
int64_t rank = inputTy.getRank();
if (!inputTy.hasStaticShape() || !resultTy.hasStaticShape())
return failure();
SmallVector<int64_t> multiples;
getValuesFromIntArrayAttribute(op.multiples(), multiples);
// Broadcast the newly added dimensions to their appropriate multiple.
SmallVector<int64_t, 2> genericShape;
for (int i = 0; i < rank; i++) {
genericShape.push_back(multiples[i]);
genericShape.push_back(inputShape[i]);
}
auto initTensor = rewriter.create<linalg::InitTensorOp>(
op.getLoc(), ArrayRef<Value>({}), genericShape, elementTy);
// We needs to map the input shape to the non-broadcasted dimensions.
SmallVector<AffineExpr, 4> dimExprs;
dimExprs.reserve(rank);
for (unsigned i = 0; i < rank; ++i)
dimExprs.push_back(rewriter.getAffineDimExpr(i * 2 + 1));
auto readAffineMap =
AffineMap::get(/*dimCount=*/rank * 2, /*symbolCount=*/0, dimExprs,
rewriter.getContext());
SmallVector<AffineMap, 2> affineMaps = {
readAffineMap, rewriter.getMultiDimIdentityMap(genericShape.size())};
auto genericOp = rewriter.create<linalg::GenericOp>(
loc, RankedTensorType::get(genericShape, elementTy), input,
ValueRange{initTensor}, affineMaps,
getNParallelLoopsAttrs(genericShape.size()),
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {
nestedBuilder.create<linalg::YieldOp>(op.getLoc(), *args.begin());
});
rewriter.replaceOpWithNewOp<tosa::ReshapeOp>(
op, resultTy, genericOp.getResult(0),
rewriter.getI64ArrayAttr(resultTy.getShape()));
return success();
}
};
class PadConverter : public OpRewritePattern<tosa::PadOp> {
public:
using OpRewritePattern<tosa::PadOp>::OpRewritePattern;
LogicalResult matchAndRewrite(tosa::PadOp padOp,
PatternRewriter &rewriter) const final {
auto loc = padOp.getLoc();
auto input = padOp.input1();
auto padding = padOp.padding();
ShapedType inputTy = input.getType().cast<ShapedType>();
ShapedType paddingTy = padding.getType().cast<ShapedType>();
Type elementTy = inputTy.getElementType();
int64_t rank = inputTy.getRank();
if (!inputTy.hasStaticShape() || !paddingTy.hasStaticShape()) {
return rewriter.notifyMatchFailure(
padOp,
"Pad converter requires static shaped input / padding values.");
}
// Setup the default constantAttr.
Value padConstant;
if (padOp.pad_const()) {
padConstant = rewriter.createOrFold<tensor::ExtractOp>(
loc, padOp.pad_const(), ValueRange({}));
} else {
Attribute constantAttr;
if (elementTy.isa<FloatType>())
constantAttr = rewriter.getFloatAttr(elementTy, 0.0);
else if (elementTy.isa<IntegerType>() && !padOp.quantization_info())
constantAttr = rewriter.getIntegerAttr(elementTy, 0);
else if (elementTy.isa<IntegerType>() && padOp.quantization_info()) {
auto value = padOp.quantization_info().getValue().input_zp().getValue();
constantAttr = rewriter.getIntegerAttr(elementTy, value.getZExtValue());
}
if (constantAttr)
padConstant = rewriter.create<arith::ConstantOp>(loc, constantAttr);
}
if (!padConstant) {
return rewriter.notifyMatchFailure(
padOp, "tosa.pad was unable to determine the pad constant value.");
}
Value lowIndex =
rewriter.create<arith::ConstantOp>(loc, rewriter.getIndexAttr(0));
Value highIndex =
rewriter.create<arith::ConstantOp>(loc, rewriter.getIndexAttr(1));
SmallVector<OpFoldResult, 3> lowValues;
SmallVector<OpFoldResult, 3> highValues;
lowValues.reserve(rank);
highValues.reserve(rank);
for (int i = 0; i < rank; i++) {
Value inputIndex = rewriter.createOrFold<arith::ConstantIndexOp>(loc, i);
Value lowVal = rewriter.createOrFold<tensor::ExtractOp>(
loc, padding, ValueRange({inputIndex, lowIndex}));
Value highVal = rewriter.createOrFold<tensor::ExtractOp>(
loc, padding, ValueRange({inputIndex, highIndex}));
lowVal = rewriter.createOrFold<arith::IndexCastOp>(
loc, rewriter.getIndexType(), lowVal);
highVal = rewriter.createOrFold<arith::IndexCastOp>(
loc, rewriter.getIndexType(), highVal);
lowValues.push_back(lowVal);
highValues.push_back(highVal);
}
auto newPadOp = linalg::PadTensorOp::createPadScalarOp(
padOp.getType(), input, padConstant, lowValues, highValues,
/*nofold=*/false, loc, rewriter);
rewriter.replaceOp(padOp, newPadOp.getResult());
return success();
}
};
// Tosa argmax lowering represents the ArgMax op as an linalg.indexed_generic
// op, producing two output buffers.
//
// The first output buffer contains the index of the found maximum value. It is
// initialized to 0 and is resulting integer type.
//
// The second output buffer contains the maximum value found. It is initialized
// to the minimum representable value of the input element type. After being
// populated by indexed_generic, this buffer is disgarded as only the index is
// requested.
//
// The indexed_generic op updates both the maximum value and index if the
// current value exceeds the running max.
class ArgMaxConverter : public OpRewritePattern<tosa::ArgMaxOp> {
public:
using OpRewritePattern<tosa::ArgMaxOp>::OpRewritePattern;
LogicalResult matchAndRewrite(tosa::ArgMaxOp argmaxOp,
PatternRewriter &rewriter) const final {
auto loc = argmaxOp.getLoc();
Value input = argmaxOp.input();
auto inputTy = input.getType().cast<ShapedType>();
auto resultTy = argmaxOp.output().getType().cast<ShapedType>();
auto inElementTy = inputTy.getElementType();
auto outElementTy = resultTy.getElementType();
int axis = argmaxOp.axis();
auto resultMaxTy = RankedTensorType::get(resultTy.getShape(), inElementTy);
if (!inputTy.hasStaticShape())
return rewriter.notifyMatchFailure(
argmaxOp,
"tosa.arg_max to linalg.* requires statically shaped input");
if (!outElementTy.isa<IntegerType>())
return rewriter.notifyMatchFailure(
argmaxOp,
"tosa.arg_max to linalg.* requires integer-like result type");
// First fill the output buffer for the index.
auto initTensorIdx =
rewriter
.create<linalg::InitTensorOp>(loc, ArrayRef<Value>({}),
resultTy.getShape(), outElementTy)
.result();
auto fillValueIdx = rewriter.create<arith::ConstantOp>(
loc, rewriter.getIntegerAttr(outElementTy, 0));
auto filledTensorIdx =
rewriter.create<linalg::FillOp>(loc, fillValueIdx, initTensorIdx)
.result();
// Second fill the output buffer for the running max.
auto initTensorMax =
rewriter
.create<linalg::InitTensorOp>(loc, ArrayRef<Value>({}),
resultTy.getShape(), inElementTy)
.result();
auto fillValueMaxAttr =
createInitialValueForReduceOp(argmaxOp, inElementTy, rewriter);
if (!fillValueMaxAttr)
return rewriter.notifyMatchFailure(
argmaxOp, "unsupported tosa.argmax element type");
auto fillValueMax =
rewriter.create<arith::ConstantOp>(loc, fillValueMaxAttr);
auto filledTensorMax =
rewriter.create<linalg::FillOp>(loc, fillValueMax, initTensorMax)
.result();
// We need to reduce along the arg-max axis, with parallel operations along
// the rest.
SmallVector<StringRef, 4> iteratorTypes;
iteratorTypes.resize(inputTy.getRank(), getParallelIteratorTypeName());
iteratorTypes[axis] = getReductionIteratorTypeName();
SmallVector<AffineExpr, 2> srcExprs;
SmallVector<AffineExpr, 2> dstExprs;
for (int i = 0, rank = inputTy.getRank(); i != rank; ++i) {
srcExprs.push_back(mlir::getAffineDimExpr(i, rewriter.getContext()));
if (axis != i)
dstExprs.push_back(mlir::getAffineDimExpr(i, rewriter.getContext()));
}
bool didEncounterError = false;
auto maps = AffineMap::inferFromExprList({srcExprs, dstExprs, dstExprs});
auto linalgOp = rewriter.create<linalg::GenericOp>(
loc, ArrayRef<Type>({resultTy, resultMaxTy}), input,
ValueRange({filledTensorIdx, filledTensorMax}), maps, iteratorTypes,
[&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange blockArgs) {
auto newValue = blockArgs[0];
auto oldIndex = blockArgs[1];
auto oldValue = blockArgs[2];
Value newIndex = rewriter.create<arith::IndexCastOp>(
nestedLoc, oldIndex.getType(),
rewriter.create<linalg::IndexOp>(loc, axis));
Value predicate;
if (inElementTy.isa<FloatType>()) {
predicate = rewriter.create<arith::CmpFOp>(
nestedLoc, arith::CmpFPredicate::OGT, newValue, oldValue);
} else if (inElementTy.isa<IntegerType>()) {
predicate = rewriter.create<arith::CmpIOp>(
nestedLoc, arith::CmpIPredicate::sgt, newValue, oldValue);
} else {
didEncounterError = true;
return;
}
auto resultMax = rewriter.create<mlir::SelectOp>(nestedLoc, predicate,
newValue, oldValue);
auto resultIndex = rewriter.create<mlir::SelectOp>(
nestedLoc, predicate, newIndex, oldIndex);
nestedBuilder.create<linalg::YieldOp>(
nestedLoc, ValueRange({resultIndex, resultMax}));
});
if (didEncounterError)
return rewriter.notifyMatchFailure(
argmaxOp, "unsupported tosa.argmax element type");
rewriter.replaceOp(argmaxOp, linalgOp.getResult(0));
return success();
}
};
class GatherConverter : public OpConversionPattern<tosa::GatherOp> {
public:
using OpConversionPattern<tosa::GatherOp>::OpConversionPattern;
LogicalResult
matchAndRewrite(tosa::GatherOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const final {
auto input = adaptor.getOperands()[0];
auto indices = adaptor.getOperands()[1];
auto inputTy = input.getType().cast<ShapedType>();
auto indicesTy = indices.getType().cast<ShapedType>();
auto resultTy = op.getType().cast<ShapedType>();
if (!inputTy.hasStaticShape() || !indicesTy.hasStaticShape())
return rewriter.notifyMatchFailure(
op, "require input type to have static shape");
auto resultElementTy = resultTy.getElementType();
auto loc = op.getLoc();
auto initTensor =
rewriter
.create<linalg::InitTensorOp>(loc, ArrayRef<Value>{},
resultTy.getShape(), resultElementTy)
.result();
SmallVector<AffineMap, 2> affineMaps = {
AffineMap::get(
/*dimCount=*/resultTy.getRank(), /*symbolCount=*/0,
{rewriter.getAffineDimExpr(0), rewriter.getAffineDimExpr(1)},
rewriter.getContext()),
rewriter.getMultiDimIdentityMap(resultTy.getRank())};
auto genericOp = rewriter.create<linalg::GenericOp>(
loc, ArrayRef<Type>({resultTy}), ValueRange{indices},
ValueRange{initTensor}, affineMaps,
getNParallelLoopsAttrs(resultTy.getRank()),
[&](OpBuilder &b, Location loc, ValueRange args) {
auto indexValue = args[0];
auto index0 = rewriter.create<linalg::IndexOp>(loc, 0);
Value index1 = rewriter.create<arith::IndexCastOp>(
loc, rewriter.getIndexType(), indexValue);
auto index2 = rewriter.create<linalg::IndexOp>(loc, 2);
Value extract = rewriter.create<tensor::ExtractOp>(
loc, input, ValueRange{index0, index1, index2});
rewriter.create<linalg::YieldOp>(loc, extract);
});
rewriter.replaceOp(op, genericOp.getResult(0));
return success();
}
};
// Lowerings the TableOp to a series of gathers and numerica operations. This
// includes interpolation between the high/low values. For the I8 varient, this
// simplifies to a single gather operation.
class TableConverter : public OpRewritePattern<tosa::TableOp> {
public:
using OpRewritePattern<tosa::TableOp>::OpRewritePattern;
LogicalResult matchAndRewrite(tosa::TableOp op,
PatternRewriter &rewriter) const final {
auto loc = op.getLoc();
Value input = op.input();
Value table = op.table();
auto inputTy = input.getType().cast<ShapedType>();
auto tableTy = table.getType().cast<ShapedType>();
auto resultTy = op.getType().cast<ShapedType>();
if (!inputTy.hasStaticShape())
return rewriter.notifyMatchFailure(
op, "require input type to have static shape");
auto inputElementTy = inputTy.getElementType();
auto tableElementTy = tableTy.getElementType();
auto resultElementTy = resultTy.getElementType();
auto initTensor =
rewriter
.create<linalg::InitTensorOp>(loc, ArrayRef<Value>{},
resultTy.getShape(), resultElementTy)
.result();
SmallVector<AffineMap, 2> affineMaps = {
rewriter.getMultiDimIdentityMap(resultTy.getRank()),
rewriter.getMultiDimIdentityMap(resultTy.getRank())};
auto genericOp = rewriter.create<linalg::GenericOp>(
loc, resultTy, ValueRange({input}), ValueRange{initTensor}, affineMaps,
getNParallelLoopsAttrs(resultTy.getRank()));
rewriter.replaceOp(op, genericOp.getResult(0));
{
OpBuilder::InsertionGuard regionGuard(rewriter);
Block *block =
rewriter.createBlock(&genericOp.region(), genericOp.region().end(),
TypeRange({inputElementTy, resultElementTy}));
auto inputValue = block->getArgument(0);
rewriter.setInsertionPointToStart(block);
if (inputElementTy.isInteger(8) && tableElementTy.isInteger(8) &&
resultElementTy.isInteger(8)) {
Value index = rewriter.create<arith::IndexCastOp>(
loc, rewriter.getIndexType(), inputValue);
Value offset = rewriter.create<arith::ConstantIndexOp>(loc, 128);
index = rewriter.create<arith::AddIOp>(loc, rewriter.getIndexType(),
index, offset);
Value extract =
rewriter.create<tensor::ExtractOp>(loc, table, ValueRange{index});
rewriter.create<linalg::YieldOp>(loc, extract);
return success();
}
if (inputElementTy.isInteger(16) && tableElementTy.isInteger(16) &&
resultElementTy.isInteger(32)) {
Value extend = rewriter.create<arith::ExtSIOp>(
loc, rewriter.getI32Type(), inputValue);
auto offset = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(32768));
auto seven = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(7));
auto one = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(1));
auto b1111111 = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(127));
// Compute the index and fractional part from the input value:
// value = value + 32768
// index = value >> 7;
// fraction = 0x01111111 & value
auto extendAdd = rewriter.create<arith::AddIOp>(loc, extend, offset);
Value index = rewriter.create<arith::ShRUIOp>(loc, extendAdd, seven);
Value fraction =
rewriter.create<arith::AndIOp>(loc, extendAdd, b1111111);
// Extract the base and next values from the table.
// base = (int32_t) table[index];
// next = (int32_t) table[index + 1];
Value indexPlusOne = rewriter.create<arith::AddIOp>(loc, index, one);
index = rewriter.create<arith::IndexCastOp>(
loc, rewriter.getIndexType(), index);
indexPlusOne = rewriter.create<arith::IndexCastOp>(
loc, rewriter.getIndexType(), indexPlusOne);
Value base =
rewriter.create<tensor::ExtractOp>(loc, table, ValueRange{index});
Value next = rewriter.create<tensor::ExtractOp>(
loc, table, ValueRange{indexPlusOne});
base =
rewriter.create<arith::ExtSIOp>(loc, rewriter.getI32Type(), base);
next =
rewriter.create<arith::ExtSIOp>(loc, rewriter.getI32Type(), next);
// Use the fractional part to interpolate between the input values:
// result = (base << 7) + (next - base) * fraction
Value baseScaled = rewriter.create<arith::ShLIOp>(loc, base, seven);
Value diff = rewriter.create<arith::SubIOp>(loc, next, base);
Value diffScaled = rewriter.create<arith::MulIOp>(loc, diff, fraction);
Value result =
rewriter.create<arith::AddIOp>(loc, baseScaled, diffScaled);
rewriter.create<linalg::YieldOp>(loc, result);
return success();
}
}
return rewriter.notifyMatchFailure(
op, "unable to create body for tosa.table op");
}
};
class MaxPool2dConverter : public OpRewritePattern<tosa::MaxPool2dOp> {
public:
using OpRewritePattern<tosa::MaxPool2dOp>::OpRewritePattern;
LogicalResult matchAndRewrite(tosa::MaxPool2dOp op,
PatternRewriter &rewriter) const final {
Location loc = op.getLoc();
Value input = op.input();
ShapedType inputTy = input.getType().cast<ShapedType>();
ShapedType resultTy = op.getType().template cast<ShapedType>();
Type resultETy = inputTy.getElementType();
if (!inputTy.hasStaticShape())
return failure();
// Determine what the initial value needs to be for the max pool op.
Attribute initialAttr;
if (resultETy.isF32())
initialAttr = rewriter.getFloatAttr(
resultETy,
APFloat::getLargest(resultETy.cast<FloatType>().getFloatSemantics(),
true));
if (resultETy.isa<IntegerType>())
initialAttr = rewriter.getIntegerAttr(
resultETy,
APInt::getSignedMinValue(resultETy.getIntOrFloatBitWidth()));
if (!initialAttr)
return rewriter.notifyMatchFailure(
op, "Unsupported initial value for tosa.maxpool_2d op");
// Apply padding as necessary.
llvm::SmallVector<int64_t> pad;
pad.resize(2, 0);
getValuesFromIntArrayAttribute(op.pad(), pad);
pad.resize(pad.size() + 2, 0);
Value paddedInput = applyPad(loc, input, pad, initialAttr, rewriter);
Value initialValue = rewriter.create<arith::ConstantOp>(loc, initialAttr);
SmallVector<int64_t> kernel, stride;
getValuesFromIntArrayAttribute(op.kernel(), kernel);
getValuesFromIntArrayAttribute(op.stride(), stride);
Attribute strideAttr = rewriter.getI64VectorAttr(stride);
Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1});
// Create the linalg op that performs pooling.
Value initTensor = rewriter.create<linalg::InitTensorOp>(
loc, resultTy.getShape(), resultTy.getElementType());
Value filledInitTensor =
rewriter.create<linalg::FillOp>(loc, initialValue, initTensor).result();
Value fakeWindowDims =
rewriter.create<linalg::InitTensorOp>(loc, kernel, resultETy);
rewriter.replaceOpWithNewOp<linalg::PoolingNhwcMaxOp>(
op, ArrayRef<Type>{resultTy}, ValueRange{paddedInput, fakeWindowDims},
filledInitTensor, strideAttr, dilationAttr);
return success();
}
};
class AvgPool2dConverter : public OpRewritePattern<tosa::AvgPool2dOp> {
public:
using OpRewritePattern<tosa::AvgPool2dOp>::OpRewritePattern;
LogicalResult matchAndRewrite(tosa::AvgPool2dOp op,
PatternRewriter &rewriter) const final {
Location loc = op.getLoc();
Value input = op.input();
ShapedType inputTy = input.getType().cast<ShapedType>();
Type inElementTy = inputTy.getElementType();
ShapedType resultTy = op.getType().template cast<ShapedType>();
Type resultETy = op.getType().cast<ShapedType>().getElementType();
Type accETy =
inElementTy.isa<IntegerType>() ? rewriter.getI32Type() : inElementTy;
ShapedType accTy = resultTy.clone(accETy);
if (!inputTy.hasStaticShape())
return failure();
// Apply padding as necessary.
llvm::SmallVector<int64_t> pad;
pad.resize(2, 0);
getValuesFromIntArrayAttribute(op.pad(), pad);
pad.resize(pad.size() + 2, 0);
Attribute padAttr = rewriter.getZeroAttr(inElementTy);
Value paddedInput = applyPad(loc, input, pad, padAttr, rewriter);
Attribute initialAttr = rewriter.getZeroAttr(accETy);
Value initialValue = rewriter.create<arith::ConstantOp>(loc, initialAttr);
SmallVector<int64_t> kernel, stride;
getValuesFromIntArrayAttribute(op.kernel(), kernel);
getValuesFromIntArrayAttribute(op.stride(), stride);
Attribute strideAttr = rewriter.getI64VectorAttr(stride);
Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1});
// Create the linalg op that performs pooling.
Value poolInitTensor =
rewriter.create<linalg::InitTensorOp>(loc, accTy.getShape(), accETy);
Value filledInitTensor =
rewriter.create<linalg::FillOp>(loc, initialValue, poolInitTensor)
.result();
Value fakeWindowDims =
rewriter.create<linalg::InitTensorOp>(loc, kernel, accETy);
// Sum across the pooled region.
Value poolingOp = rewriter
.create<linalg::PoolingNhwcSumOp>(
loc, ArrayRef<Type>{accTy},
ValueRange{paddedInput, fakeWindowDims},
filledInitTensor, strideAttr, dilationAttr)
.getResult(0);
// Normalize the summed value by the number of elements grouped in each
// pool.
auto poolingOpTy = poolingOp.getType().cast<ShapedType>();
auto affineMap = rewriter.getMultiDimIdentityMap(resultTy.getRank());
Value genericInitTensor = rewriter.create<linalg::InitTensorOp>(
loc, resultTy.getShape(), resultETy);
auto genericOp = rewriter.create<linalg::GenericOp>(
loc, ArrayRef<Type>({resultTy}), ValueRange{poolingOp},
ValueRange{genericInitTensor},
ArrayRef<AffineMap>({affineMap, affineMap}),
getNParallelLoopsAttrs(resultTy.getRank()),
[&](OpBuilder &b, Location loc, ValueRange args) {
auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
auto one = rewriter.create<arith::ConstantIndexOp>(loc, 1);
auto iH = rewriter.create<arith::ConstantIndexOp>(
loc, poolingOpTy.getDimSize(1) - 1);
auto iW = rewriter.create<arith::ConstantIndexOp>(
loc, poolingOpTy.getDimSize(2) - 1);
// Compute the indices from either end.
auto y0 = rewriter.create<linalg::IndexOp>(loc, 1);
auto x0 = rewriter.create<linalg::IndexOp>(loc, 2);
auto y1 = rewriter.create<arith::SubIOp>(loc, iH, y0);
auto x1 = rewriter.create<arith::SubIOp>(loc, iW, x0);
// Determines what the portion of valid input is covered by the
// kernel.
auto padFn = [&](Value v, Value x, int64_t pad) -> Value {
if (pad == 0)
return v;
auto padVal = rewriter.create<arith::ConstantIndexOp>(loc, pad);
Value dx = rewriter.create<arith::SubIOp>(loc, x, padVal);
Value cmp = rewriter.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::slt, dx, zero);
Value offset = rewriter.create<mlir::SelectOp>(loc, cmp, dx, zero);
return rewriter.create<arith::AddIOp>(loc, v, offset)->getResult(0);
};
// Compute the vertical component of coverage.
auto kH0 = rewriter.create<arith::ConstantIndexOp>(loc, kernel[0]);
auto kH1 = padFn(kH0, y0, pad[2]);
auto kH2 = padFn(kH1, y1, pad[3]);
auto kHCmp = rewriter.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::slt, kH2, one);
auto kH3 = rewriter.create<SelectOp>(loc, kHCmp, one, kH2);
// compute the horizontal component of coverage.
auto kW0 = rewriter.create<arith::ConstantIndexOp>(loc, kernel[1]);
auto kW1 = padFn(kW0, x0, pad[4]);
auto kW2 = padFn(kW1, x1, pad[5]);
auto kWCmp = rewriter.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::slt, kW2, one);
auto kW3 = rewriter.create<SelectOp>(loc, kWCmp, one, kW2);
// Compute the total number of elements and normalize.
Value count = rewriter.create<arith::MulIOp>(loc, kH3, kW3);
auto countI = rewriter.create<arith::IndexCastOp>(
loc, rewriter.getI32Type(), count);
// Divide by the number of summed values. For floats this is just
// a div however for quantized values input normalization had
// to be applied.
Value poolVal = args[0];
if (accETy.isa<FloatType>()) {
auto countF = rewriter.create<arith::SIToFPOp>(loc, accETy, countI);
poolVal = rewriter.create<arith::DivFOp>(loc, poolVal, countF)
->getResult(0);
} else {
// If we have quantization information we need to apply an offset
// for the input zp value.
if (op.quantization_info()) {
auto quantizationInfo = op.quantization_info().getValue();
auto inputZp = rewriter.create<arith::ConstantOp>(
loc, quantizationInfo.input_zp());
Value offset =
rewriter.create<arith::MulIOp>(loc, accETy, countI, inputZp);
poolVal =
rewriter.create<arith::SubIOp>(loc, accETy, poolVal, offset);
}
// Compute the multiplier and shift values for the quantization
// normalization. Preferably we would want to compute more bits
// however 32-bits should be enough for compute. Honestly we
// should probably straight divide.
int64_t numerator = ((1 << 30) + 1);
int64_t shift = 30;
Value numeratorVal = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI32IntegerAttr(numerator));
Value multiplierVal =
rewriter
.create<arith::DivUIOp>(loc, rewriter.getI32Type(),
numeratorVal, countI)
.getResult();
Value shiftVal = rewriter.create<arith::ConstantOp>(
loc, rewriter.getI8IntegerAttr(shift));
auto scaled =
rewriter
.create<tosa::ApplyScaleOp>(
loc, rewriter.getI32Type(), poolVal, multiplierVal,
shiftVal, rewriter.getBoolAttr(false))
.getResult();
// If we have quantization information we need to apply output
// zeropoint.
if (op.quantization_info()) {
auto quantizationInfo = op.quantization_info().getValue();
auto outputZp = rewriter.create<arith::ConstantOp>(
loc, quantizationInfo.output_zp());
scaled = rewriter.create<arith::AddIOp>(loc, scaled, outputZp)
.getResult();
}
// Apply Clip.
int64_t outBitwidth = resultETy.getIntOrFloatBitWidth();
auto min = rewriter.create<arith::ConstantIntOp>(
loc, APInt::getSignedMinValue(outBitwidth).getSExtValue(),
accETy);
auto max = rewriter.create<arith::ConstantIntOp>(
loc, APInt::getSignedMaxValue(outBitwidth).getSExtValue(),
accETy);
auto clamp = clampHelper<arith::CmpIOp>(
loc, scaled, min, max, arith::CmpIPredicate::slt, rewriter);
poolVal = clamp;
// Convert type.
if (resultETy != clamp.getType()) {
poolVal =
rewriter.create<arith::TruncIOp>(loc, resultETy, poolVal);
}
}
rewriter.create<linalg::YieldOp>(loc, poolVal);
});
rewriter.replaceOp(op, genericOp.getResult(0));
return success();
}
};
} // namespace
void mlir::tosa::populateTosaToLinalgConversionPatterns(
RewritePatternSet *patterns) {
patterns->add<
// clang-format off
PointwiseConverter<tosa::AddOp>,
PointwiseConverter<tosa::SubOp>,
PointwiseConverter<tosa::MulOp>,
PointwiseConverter<tosa::DivOp>,
PointwiseConverter<tosa::NegateOp>,
PointwiseConverter<tosa::PowOp>,
PointwiseConverter<tosa::ReciprocalOp>,
PointwiseConverter<tosa::RsqrtOp>,
PointwiseConverter<tosa::LogOp>,
PointwiseConverter<tosa::ExpOp>,
PointwiseConverter<tosa::AbsOp>,
PointwiseConverter<tosa::TanhOp>,
PointwiseConverter<tosa::BitwiseAndOp>,
PointwiseConverter<tosa::BitwiseOrOp>,
PointwiseConverter<tosa::BitwiseNotOp>,
PointwiseConverter<tosa::BitwiseXorOp>,
PointwiseConverter<tosa::LogicalAndOp>,
PointwiseConverter<tosa::LogicalNotOp>,
PointwiseConverter<tosa::LogicalOrOp>,
PointwiseConverter<tosa::LogicalXorOp>,
PointwiseConverter<tosa::CastOp>,
PointwiseConverter<tosa::LogicalLeftShiftOp>,
PointwiseConverter<tosa::LogicalRightShiftOp>,
PointwiseConverter<tosa::ArithmeticRightShiftOp>,
PointwiseConverter<tosa::ClzOp>,
PointwiseConverter<tosa::SelectOp>,
PointwiseConverter<tosa::GreaterOp>,
PointwiseConverter<tosa::GreaterEqualOp>,
PointwiseConverter<tosa::EqualOp>,
PointwiseConverter<tosa::MaximumOp>,
PointwiseConverter<tosa::MinimumOp>,
PointwiseConverter<tosa::CeilOp>,
PointwiseConverter<tosa::FloorOp>,
PointwiseConverter<tosa::ClampOp>,
PointwiseConverter<tosa::ReluNOp>,
PointwiseConverter<tosa::SigmoidOp>,
IdentityNConverter<tosa::IdentityOp>,
ReduceConverter<tosa::ReduceAllOp>,
ReduceConverter<tosa::ReduceAnyOp>,
ReduceConverter<tosa::ReduceMinOp>,
ReduceConverter<tosa::ReduceMaxOp>,
ReduceConverter<tosa::ReduceSumOp>,
ReduceConverter<tosa::ReduceProdOp>,
ArgMaxConverter,
ConcatConverter,
ConvConverter,
DepthwiseConvConverter,
GatherConverter,
PadConverter,
ReshapeConverterCollapse,
ReshapeConverterExpand,
ReshapeConverterCollapseExpand,
RescaleConverter,
ResizeConverter,
ReverseConverter,
TableConverter,
TileConverter,
TransposeConverter,
MatMulConverter,
MaxPool2dConverter,
AvgPool2dConverter,
FullyConnectedConverter>(patterns->getContext());
// clang-format on
}