| //===- HoistPadding.cpp - Hoisting transformation for PadTensorOp ---------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements functions concerned with hoisting padding operations. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "mlir/Dialect/Linalg/Transforms/HoistPadding.h" |
| #include "mlir/Analysis/SliceAnalysis.h" |
| #include "mlir/Dialect/Affine/Utils.h" |
| #include "mlir/Dialect/Linalg/IR/LinalgOps.h" |
| #include "mlir/Dialect/Linalg/Transforms/Transforms.h" |
| #include "mlir/Dialect/SCF/SCF.h" |
| #include "mlir/Dialect/SCF/Utils.h" |
| #include "mlir/Dialect/StandardOps/IR/Ops.h" |
| #include "mlir/Dialect/Tensor/IR/Tensor.h" |
| #include "mlir/Dialect/Vector/VectorOps.h" |
| #include "mlir/Dialect/Vector/VectorUtils.h" |
| #include "mlir/IR/AsmState.h" |
| #include "mlir/IR/BuiltinOps.h" |
| #include "mlir/IR/Dominance.h" |
| #include "mlir/Transforms/LoopUtils.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/Support/Debug.h" |
| |
| using llvm::dbgs; |
| |
| #define DEBUG_TYPE "hoist-padding" |
| |
| #define DBGS() (dbgs() << '[' << DEBUG_TYPE << "] ") |
| |
| using namespace mlir; |
| using namespace mlir::linalg; |
| |
| /// Analysis class to support PadTensorOp hoisting across multiple enclosing |
| /// loops. The failure conditions are: |
| /// 1. Pad op has a use that is not an input of a LinalgOp. |
| /// 2. Pad op does not have a constant padding value. |
| /// 3. There is no immediately enclosing scf::ForOp. |
| /// 4. The backward slice from the pad op to the scf::ForOp to hoist above |
| /// contains an unknown op with non index type operands, a region, or a |
| /// memory effect. |
| /// 5. The backward slice from the pad op to the scf::ForOp to hoist above is |
| /// empty. |
| /// 6. The source tensor of pad op is not defined by an extract slice op. |
| /// 7. The source tensor of the extract slice op is not defined outside of |
| /// the outermost enclosing scf::ForOp. |
| /// 8. There is no enclosing scf::ForOp that indexes the padded data. |
| /// Other cases succeed and will trigger hoisting of the pad op. |
| struct HoistingAnalysis { |
| HoistingAnalysis(PadTensorOp padTensorOp, int numLoops); |
| |
| bool isValid() { return valid; } |
| |
| /// Footprint of the packedTensor, computed from the packingLoops. |
| SmallVector<Value> getPackedTensorSizes(ImplicitLocOpBuilder &b); |
| |
| /// The outermost loop, determined by `nLevels` above which `padTensorOp` will |
| /// be hoisted. |
| scf::ForOp outermostEnclosingForOp; |
| |
| /// Backward slice rooted at `padTensorOp` and nested under |
| /// `outermostEnclosingForOp`. |
| SetVector<Operation *> backwardSlice; |
| |
| /// The scf::ForOp immediately enclosing `padTensorOp` such that: |
| /// 1. they are nested under `outermostEnclosingForOp` (inclusive) |
| /// 2. whose induction variable is used, directly or indirectly, in the |
| /// computation of `padTensorOp`. |
| /// The span of these loops determines the footprint of the packed tensor. |
| SmallVector<scf::ForOp> packingLoops; |
| |
| private: |
| /// Drop any non-index dependencies of `padTensorOp` and `sliceOp` from |
| /// `backwardSlice`. The method follows the use-def chains of the index |
| /// operands consumed by `padTensorOp` and `sliceOp` and drops the operations |
| /// not part of this index computation. Afterwards, the filtered |
| /// `backwardSlice` contains only the loops whose induction variable is used, |
| /// directly or indirectly, to index the padded tensor. The method returns |
| /// failure if the filtered backward slice contains an unexpected operation. |
| /// |
| /// Example: |
| /// ``` |
| /// %source = linalg.fill(%cst, %arg0) |
| /// scf.for %i |
| /// %unrelated = linalg.fill(%cst, %arg1) // not used to index %source! |
| /// scf.for %j (%arg2 = %unrelated) |
| /// scf.for %k // not used to index %source! |
| /// %ubi = affine.min #map(%i) |
| /// %ubj = affine.min #map(%j) |
| /// %slice = tensor.extract_slice %source [%i, %j] [%ubi, %ubj] |
| /// %padded_slice = linalg.pad_tensor %slice |
| /// ``` |
| /// dropNonIndexDependencies(%padded_slice, %slice) |
| /// removes [scf.for %k, linalg.fill(%cst, %arg1)] from backwardSlice. |
| LogicalResult dropNonIndexDependencies(PadTensorOp padTensorOp, |
| tensor::ExtractSliceOp sliceOp); |
| |
| /// Encodes whether the analysis is valid and hoisting can proceed. |
| bool valid; |
| }; |
| |
| /// Return true if all uses of `padTensorOp` are an input tensor of some |
| /// LinalgOp. |
| static bool isOnlyUsedAsInputOfLinalgOp(PadTensorOp padTensorOp) { |
| for (OpOperand &use : padTensorOp.result().getUses()) { |
| auto linalgUser = dyn_cast<linalg::LinalgOp>(use.getOwner()); |
| if (!linalgUser || !linalgUser.isInputTensor(&use)) { |
| LLVM_DEBUG(DBGS() << "Found a use of " << *(padTensorOp) |
| << "\nthat is not an input tensor of a LinalgOp, " |
| << "cannot hoist\n" |
| << *(use.getOwner()) << "\n"); |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /// Return at most nLevels of immediately enclosing scf::ForOp loops. |
| /// Stops at the first parent that is not an scf::ForOp. |
| /// Multi-loops such as scf.parallel or linalg.tiled_loop are not modeled atm. |
| /// Control-flow and other containing ops with regions are not modeled atm. |
| static void |
| getAtMostNEnclosingLoops(PadTensorOp padTensorOp, int nLevels, |
| SmallVector<scf::ForOp> &reverseEnclosingLoops) { |
| AsmState state(padTensorOp->getParentOfType<mlir::FuncOp>()); |
| (void)state; |
| scf::ForOp outermostEnclosingForOp = nullptr; |
| Operation *nextEnclosingOp = padTensorOp->getParentOp(); |
| while (nLevels-- > 0 && |
| (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) { |
| LLVM_DEBUG( |
| DBGS() << "loops: "; |
| outermostEnclosingForOp.getInductionVar().printAsOperand(dbgs(), state); |
| dbgs() << "\n"); |
| reverseEnclosingLoops.push_back(outermostEnclosingForOp); |
| nextEnclosingOp = outermostEnclosingForOp->getParentOp(); |
| } |
| } |
| |
| HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) { |
| valid = false; |
| |
| // Bail on any use that isn't an input of a Linalg op. |
| // Hoisting of inplace updates happens after vectorization. |
| if (!isOnlyUsedAsInputOfLinalgOp(padTensorOp)) |
| return; |
| |
| // Get at most `numLoops` of immediately enclosing loops. |
| SmallVector<scf::ForOp> reverseEnclosingLoops; |
| getAtMostNEnclosingLoops(padTensorOp, numLoops, reverseEnclosingLoops); |
| if (reverseEnclosingLoops.empty()) { |
| LLVM_DEBUG(DBGS() << "No immediately enclosing loop -> skip\n"); |
| return; |
| } |
| |
| outermostEnclosingForOp = reverseEnclosingLoops.back(); |
| |
| // Get the `sliceOp` that defines the source tensor of `padTensorOp` and |
| // check its source is defined outside of the outermost loop. This check |
| // ensures the padded data is available for packing before entering the |
| // outermost enclosing loop. |
| // |
| // Example: |
| // ``` |
| // %source = linalg.fill(%cst, %arg0) |
| // // %source is available for packing here! |
| // scf.for %i |
| // scf.for %j |
| // scf.for %k |
| // %slice = tensor.extract_slice %source [%i, %j] |
| // %padded_slice = linalg.pad_tensor %slice |
| // ``` |
| auto sliceOp = padTensorOp.source().getDefiningOp<tensor::ExtractSliceOp>(); |
| if (!sliceOp) { |
| LLVM_DEBUG(DBGS() << "Cannot find the extract slice op -> skip\n"); |
| return; |
| } |
| if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.source())) { |
| LLVM_DEBUG(DBGS() << "Source not defined outside of loops -> skip\n"); |
| return; |
| } |
| |
| // Check the region of `padTensorOp` depends on a constant only. Adding |
| // hoisting support for arbitrary padding regions would require cloning all |
| // dependencies captured by the padding region. |
| Value paddingValue = padTensorOp.getConstantPaddingValue(); |
| if (!paddingValue || |
| !isa_and_nonnull<arith::ConstantOp>(paddingValue.getDefiningOp())) { |
| LLVM_DEBUG(DBGS() << "Cannot find constant padding value -> skip\n"); |
| return; |
| } |
| |
| // Get all the ops in the backwards slice starting from `padTensorOp` and that |
| // are dominated by the outermost enclosing loop. |
| DominanceInfo domInfo(outermostEnclosingForOp); |
| getBackwardSlice(padTensorOp.getOperation(), &backwardSlice, |
| [&](Operation *op) { |
| return domInfo.dominates(outermostEnclosingForOp, op); |
| }); |
| if (backwardSlice.empty()) |
| return; |
| // Add `padTensorOp` itself to the backward slice. |
| backwardSlice.insert(padTensorOp.getOperation()); |
| |
| // Remove all ops in the backward slice that are not used to index the padded |
| // tensor. In particular, keep `padTensorOp`, `sliceOp`, and the loop and |
| // affine operations used for the index computation. |
| if (failed(dropNonIndexDependencies(padTensorOp, sliceOp))) |
| return; |
| |
| // Add only the loops part of the filtered `backwardSlice` to the packing |
| // loops. All other loops are not used to index the padded data and |
| // consequently access the same data in every loop iteration. Adding them to |
| // the packing loops would increase the cache footprint of the packed data |
| // by storing the same data multiple times. |
| for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops)) |
| if (backwardSlice.contains(forOp)) |
| packingLoops.push_back(forOp); |
| if (packingLoops.empty()) { |
| LLVM_DEBUG(DBGS() << "Cannot find a packing loop -> skip\n"); |
| return; |
| } |
| |
| // The analysis is valid and hoisting can occur. |
| valid = true; |
| } |
| |
| LogicalResult |
| HoistingAnalysis::dropNonIndexDependencies(PadTensorOp padTensorOp, |
| tensor::ExtractSliceOp sliceOp) { |
| // Set of all values used for index computation. |
| SetVector<Value> indexEdges; |
| |
| // Add all index operands of `operation` to `indexEdges`. An index operand is |
| // an operand of type index. |
| auto addIndexOperandsToIndexEdges = [&](Operation *operation) { |
| for (Value operand : operation->getOperands()) |
| if (operand.getType().isIndex()) |
| indexEdges.insert(operand); |
| }; |
| |
| // Check if any operation result is contained in `indexEdges`. |
| auto hasIndexResult = [&](Operation *operation) { |
| return llvm::any_of(operation->getResults(), [&](Value result) { |
| return indexEdges.contains(result); |
| }); |
| }; |
| |
| // Starting from `padTensorOp` and `sliceOp` walk the use-def edges of index |
| // type in `backwardSlice`. Add the index operands of an operation to |
| // `indexEdges` and remove all operations from `backwardSlice` that are not |
| // part of the index computation. |
| // |
| // Example: |
| // ``` |
| // %source = linalg.fill(%cst, %arg0) |
| // scf.for %i |
| // %unrelated = linalg.fill(%cst, %arg1) // not used to index %source! |
| // scf.for %j (%arg2 = %unrelated) |
| // scf.for %k // not used to index %source! |
| // %ubi = affine.min #map(%i) |
| // %ubj = affine.min #map(%j) |
| // %slice = tensor.extract_slice %source [%i, %j] [%ubi, %ubj] |
| // %padded_slice = linalg.pad_tensor %slice |
| // ``` |
| // After iterating `backwardSlice` we obtain: |
| // indexEdges = [%i, %j, %ubi, %ubj] |
| // backwardSlice = backwardSlice / [linalg.fill(%cst, %arg1), scf.for %k] |
| for (Operation *op : llvm::reverse(backwardSlice)) { |
| // Add the index operands of `padTensorOp` and `sliceOp` to start the |
| // exploration of the index computation. |
| if (op == padTensorOp || op == sliceOp) { |
| addIndexOperandsToIndexEdges(op); |
| continue; |
| } |
| // Add the index operands of the loop if its induction variable is |
| // used for index computation. |
| if (auto forOp = dyn_cast<scf::ForOp>(op)) { |
| if (!hasIndexResult(op) && indexEdges.contains(forOp.getInductionVar())) { |
| addIndexOperandsToIndexEdges(op); |
| continue; |
| } |
| } |
| // Add the index operands of all other operations if at least one result is |
| // used for index computation. |
| if (hasIndexResult(op)) { |
| addIndexOperandsToIndexEdges(op); |
| // Check the operands of the remaining operations all have index type. |
| if (llvm::any_of(op->getOperandTypes(), |
| [](Type type) { return !type.isIndex(); })) { |
| LLVM_DEBUG(DBGS() << "Unsupported op with non index type operands: " |
| << op << " -> skip\n"); |
| return failure(); |
| } |
| // Check the remaining operations do not have regions or memory effects. |
| auto effectInterface = dyn_cast<MemoryEffectOpInterface>(op); |
| bool hasMemoryEffect = effectInterface && !effectInterface.hasNoEffect(); |
| if (hasMemoryEffect || op->getNumRegions() != 0) { |
| LLVM_DEBUG(DBGS() << "Unsupported op with region or memory effect: " |
| << op << " -> skip\n"); |
| return failure(); |
| } |
| continue; |
| } |
| // Remove all other operation not used by the index computation except for |
| // constant operations that may be padding values used by `padTensorOp`. |
| if (!isa<arith::ConstantOp>(op)) |
| backwardSlice.remove(op); |
| } |
| return success(); |
| } |
| |
| SmallVector<Value> |
| HoistingAnalysis::getPackedTensorSizes(ImplicitLocOpBuilder &b) { |
| SmallVector<Value> dynamicTensorSizes; |
| |
| // Upper bound the packing loop lengths to size the packed tensor. Taking |
| // upper bounds can make the sizes of the packed tensor independent of the |
| // enclosing loops. This independence is a prerequisite for reusing the same |
| // buffer for all enclosing loop iterations and hoisting its allocation out of |
| // the enclosing loops. |
| for (auto forOp : packingLoops) { |
| // Compute an upper bound `ubVal` for the upper bound of `forOp`. |
| AffineMap boundMap; |
| SmallVector<Value> boundOperands; |
| getUpperBoundForIndex(forOp.upperBound(), boundMap, boundOperands); |
| Value ubVal = b.createOrFold<AffineMinOp>(boundMap, boundOperands); |
| // Compute the maximal packing loop length as (ub - lb).ceilDiv(step) and |
| // store the result to `dynamicTensorSizes`. |
| // TODO: instead of using the lower bound of `forOp` directly, implement a |
| // lower bound computation similar to the upper bound computation. |
| AffineExpr lb, ub, step; |
| bindDims(b.getContext(), lb, ub); |
| bindSymbols(b.getContext(), step); |
| Value res = b.createOrFold<AffineApplyOp>( |
| (ub - lb).ceilDiv(step), |
| ValueRange{forOp.lowerBound(), ubVal, cast<scf::ForOp>(forOp).step()}); |
| dynamicTensorSizes.push_back(res); |
| } |
| |
| return dynamicTensorSizes; |
| } |
| |
| static bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v) { |
| return outer.isDefinedOutsideOfLoop(v) || v.getDefiningOp<ConstantOp>(); |
| } |
| |
| /// Return the current iteration number in the loop (iv - lb).ceilDiv(step). |
| /// The returned Value is guaranteed not to depend on any loop comprised in |
| /// [`outer`, `forOp`]. |
| /// Return null if such a loop-independent quantity cannot be computed. |
| static Value buildLoopIterationCount(OpBuilder &b, scf::ForOp outer, |
| scf::ForOp forOp) { |
| MLIRContext *ctx = forOp->getContext(); |
| AffineExpr iv, lb, step; |
| bindDims(ctx, iv, lb); |
| bindSymbols(ctx, step); |
| if (!isDefinedOutsideOrConstant(outer, forOp.lowerBound()) || |
| !isDefinedOutsideOrConstant(outer, forOp.step())) |
| return Value(); |
| Value ivVal = forOp.getInductionVar(), lbVal = forOp.lowerBound(), |
| stepVal = forOp.step(); |
| auto loc = forOp->getLoc(); |
| return b.createOrFold<AffineApplyOp>(loc, (iv - lb).ceilDiv(step), |
| ValueRange{ivVal, lbVal, stepVal}); |
| } |
| |
| FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist, |
| int numLoops, |
| PadTensorOp &hoistedOp) { |
| LLVM_DEBUG(DBGS() << "Try to hoist " << *(opToHoist) << " by " << numLoops |
| << " loops\n"); |
| HoistingAnalysis analysis(opToHoist, numLoops); |
| if (!analysis.isValid()) { |
| LLVM_DEBUG(DBGS() << "Analysis failed -> Skip\n"); |
| return failure(); |
| } |
| |
| scf::ForOp outer = analysis.outermostEnclosingForOp; |
| ImplicitLocOpBuilder b(outer->getLoc(), outer); |
| |
| SmallVector<Value> dynamicTensorSizes = analysis.getPackedTensorSizes(b); |
| |
| // Update actual number of loops, which may be smaller. |
| int nPackedLoops = analysis.packingLoops.size(); |
| |
| Location loc = opToHoist->getLoc(); |
| RankedTensorType paddedTensorType = opToHoist.getResultType(); |
| int paddedRank = paddedTensorType.getRank(); |
| |
| // Create the packed tensor<?x?x..?xpadded_shape> into which we amortize |
| // padding. |
| SmallVector<int64_t> packedShape(nPackedLoops, ShapedType::kDynamicSize); |
| // TODO: go grab dims when necessary, for now PadTensorOp returns a static |
| // tensor. |
| llvm::append_range(packedShape, paddedTensorType.getShape()); |
| auto packedTensorType = |
| RankedTensorType::get(packedShape, paddedTensorType.getElementType()); |
| Value packedTensor = b.create<linalg::InitTensorOp>( |
| loc, dynamicTensorSizes, packedTensorType.getShape(), |
| packedTensorType.getElementType()); |
| |
| // Clone the operations involved in the backward slice, iteratively stepping |
| // into the loops that we encounter. |
| // The implementation proceeds in a stack-like fashion: |
| // 1. Iteratively clone and step into the loops, pushing the `packedTensor` |
| // deeper in the stack. |
| // 2. Create a InsertSliceOp at the top of the stack. |
| // 3. Iteratively pop and yield the result of the InsertSliceOp across |
| // the cloned loops. |
| SmallVector<Value> clonedLoopIvs, leadingPackedTensorIndexings; |
| clonedLoopIvs.reserve(nPackedLoops); |
| leadingPackedTensorIndexings.reserve(nPackedLoops); |
| BlockAndValueMapping bvm; |
| // Stack step 1. iteratively clone loops and push `packedTensor`. |
| for (Operation *op : analysis.backwardSlice) { |
| // Specifically sit out in the extract_slice(packedTensor) case: this is the |
| // piece we seek to replace. |
| if (auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(op)) |
| if (bvm.lookupOrDefault(sliceOp.source()) == packedTensor) |
| continue; |
| // Clone all operations except it is a loop. |
| auto forOp = dyn_cast<scf::ForOp>(op); |
| if (!forOp) { |
| b.clone(*op, bvm); |
| continue; |
| } |
| // Create a packing loop that takes `packedTensor` as iteration argument. |
| auto clonedForOp = |
| b.create<scf::ForOp>(loc, bvm.lookupOrDefault(forOp.lowerBound()), |
| bvm.lookupOrDefault(forOp.upperBound()), |
| bvm.lookupOrDefault(forOp.step()), packedTensor); |
| // Map the induction var, region args and results to the `clonedForOp`. |
| bvm.map(forOp.getInductionVar(), clonedForOp.getInductionVar()); |
| bvm.map(forOp.getRegionIterArgs(), clonedForOp.getRegionIterArgs()); |
| bvm.map(forOp.getResults(), clonedForOp.getResults()); |
| assert(clonedForOp->getNumRegions() == 1); |
| clonedLoopIvs.push_back(clonedForOp.getInductionVar()); |
| |
| b.setInsertionPointToStart(&clonedForOp->getRegion(0).front()); |
| Value loopIndependentIterationCount = |
| buildLoopIterationCount(b, outer, clonedForOp); |
| // Assert the loop-independent iteration count can be computed. |
| if (!loopIndependentIterationCount) |
| llvm_unreachable("loop independence prerequisite not met"); |
| leadingPackedTensorIndexings.push_back(loopIndependentIterationCount); |
| packedTensor = clonedForOp.getRegionIterArgs().front(); |
| } |
| |
| // Stack step 2. create InsertSliceOp at the top of the stack. |
| // offsets = [clonedLoopIvs, 0 .. 0]. |
| SmallVector<OpFoldResult> offsets(leadingPackedTensorIndexings.begin(), |
| leadingPackedTensorIndexings.end()); |
| offsets.append(paddedRank, b.getIndexAttr(0)); |
| // sizes = [1 .. 1, paddedShape]. |
| SmallVector<OpFoldResult> sizes(nPackedLoops, b.getIndexAttr(1)); |
| for (int64_t sz : paddedTensorType.getShape()) { |
| // TODO: go grab dims when necessary, for now PadTensorOp returns a static |
| // tensor. |
| assert(!ShapedType::isDynamic(sz) && "padded tensor needs static sizes"); |
| sizes.push_back(b.getIndexAttr(sz)); |
| } |
| // strides = [1 .. 1]. |
| SmallVector<OpFoldResult> strides(nPackedLoops + paddedRank, |
| b.getIndexAttr(1)); |
| |
| Value inserted = |
| b.create<tensor::InsertSliceOp>(loc, bvm.lookup(opToHoist.result()), |
| packedTensor, offsets, sizes, strides); |
| |
| // Stack step 3. iteratively pop the stack and propagate the yield. |
| Value valueToYield = inserted; |
| for (Value iv : llvm::reverse(clonedLoopIvs)) { |
| auto forOp = scf::getForInductionVarOwner(iv); |
| b.setInsertionPointToEnd(&forOp.getRegion().front()); |
| b.create<scf::YieldOp>(loc, valueToYield); |
| valueToYield = forOp.getResult(0); |
| } |
| |
| // Now the packed tensor is ready, replace the original padding op by a |
| // 1x..x1 slice [originalLoopIvs, 0 .. 0][1 .. 1, paddedShape][1 .. 1]. |
| b.setInsertionPoint(opToHoist); |
| SmallVector<Value> loopIterationCounts = llvm::to_vector<4>( |
| llvm::map_range(analysis.packingLoops, [&](Operation *loop) { |
| return buildLoopIterationCount(b, outer, cast<scf::ForOp>(loop)); |
| })); |
| // Assert all loop iteration counts can be computed. |
| if (llvm::any_of(loopIterationCounts, [](Value v) { return !v; })) |
| llvm_unreachable("loop independence prerequisite not met"); |
| // offsets = [originalLoopIvs, 0 .. 0]. |
| offsets.assign(loopIterationCounts.begin(), loopIterationCounts.end()); |
| offsets.append(paddedRank, b.getIndexAttr(0)); |
| // sizes = [1 .. 1, paddedShape] (definedabove). |
| // strides = [1 .. 1] (defined above) |
| packedTensor = |
| scf::getForInductionVarOwner(clonedLoopIvs.front())->getResult(0); |
| Value newResult = b.create<tensor::ExtractSliceOp>( |
| loc, opToHoist.getResultType(), packedTensor, offsets, sizes, strides); |
| |
| // Make the newly cloned `opToHoist` available to the caller. |
| hoistedOp = cast<PadTensorOp>(bvm.lookup(opToHoist.result()).getDefiningOp()); |
| return newResult; |
| } |