| //===- ACCImplicitData.cpp ------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This pass implements the OpenACC specification for "Variables with |
| // Implicitly Determined Data Attributes" (OpenACC 3.4 spec, section 2.6.2). |
| // |
| // Overview: |
| // --------- |
| // The pass automatically generates data clause operations for variables used |
| // within OpenACC compute constructs (parallel, kernels, serial) that do not |
| // already have explicit data clauses. The semantics follow these rules: |
| // |
| // 1. If there is a default(none) clause visible, no implicit data actions |
| // apply. |
| // |
| // 2. An aggregate variable (arrays, derived types, etc.) will be treated as: |
| // - In a present clause when default(present) is visible. |
| // - In a copy clause otherwise. |
| // |
| // 3. A scalar variable will be treated as if it appears in: |
| // - A copy clause if the compute construct is a kernels construct. |
| // - A firstprivate clause otherwise (parallel, serial). |
| // |
| // Requirements: |
| // ------------- |
| // To use this pass in a pipeline, the following requirements must be met: |
| // |
| // 1. Type Interface Implementation: Variables from the dialect being used |
| // must implement one or both of the following MLIR interfaces: |
| // `acc::MappableType` and/or `acc::PointerLikeType` |
| // |
| // These interfaces provide the necessary methods for the pass to: |
| // - Determine variable type categories (scalar vs. aggregate) |
| // - Generate appropriate bounds information |
| // - Generate privatization recipes |
| // |
| // 2. Operation Interface Implementation: Operations that access partial |
| // entities or create views should implement the following MLIR |
| // interfaces: `acc::PartialEntityAccess` and/or |
| // `mlir::ViewLikeOpInterface` |
| // |
| // These interfaces are used for proper data clause ordering, ensuring |
| // that base entities are mapped before derived entities (e.g., a |
| // struct is mapped before its fields, an array is mapped before |
| // subarray views). |
| // |
| // 3. Analysis Registration (Optional): If custom behavior is needed for |
| // variable name extraction or alias analysis, the dialect should |
| // pre-register the `acc::OpenACCSupport` and `mlir::AliasAnalysis` analyses. |
| // |
| // If not registered, default behavior will be used. |
| // |
| // Implementation Details: |
| // ----------------------- |
| // The pass performs the following operations: |
| // |
| // 1. Finds candidate variables which are live-in to the compute region and |
| // are not already in a data clause or private clause. |
| // |
| // 2. Generates both data "entry" and "exit" clause operations that match |
| // the intended action depending on variable type: |
| // - copy -> acc.copyin (entry) + acc.copyout (exit) |
| // - present -> acc.present (entry) + acc.delete (exit) |
| // - firstprivate -> acc.firstprivate (entry only, no exit) |
| // |
| // 3. Ensures that default clause is taken into consideration by looking |
| // through current construct and parent constructs to find the "visible |
| // default clause". |
| // |
| // 4. Fixes up SSA value links so that uses in the acc region reference the |
| // result of the newly created data clause operations. |
| // |
| // 5. When generating implicit data clause operations, it also adds variable |
| // name information and marks them with the implicit flag. |
| // |
| // 6. Recipes are generated by calling the appropriate entrypoints in the |
| // MappableType and PointerLikeType interfaces. |
| // |
| // 7. AliasAnalysis is used to determine if a variable is already covered by |
| // an existing data clause (e.g., an interior pointer covered by its parent). |
| // |
| // Examples: |
| // --------- |
| // |
| // Example 1: Scalar in parallel construct (implicit firstprivate) |
| // |
| // Before: |
| // func.func @test() { |
| // %scalar = memref.alloca() {acc.var_name = "x"} : memref<f32> |
| // acc.parallel { |
| // %val = memref.load %scalar[] : memref<f32> |
| // acc.yield |
| // } |
| // } |
| // |
| // After: |
| // func.func @test() { |
| // %scalar = memref.alloca() {acc.var_name = "x"} : memref<f32> |
| // %firstpriv = acc.firstprivate varPtr(%scalar : memref<f32>) |
| // -> memref<f32> {implicit = true, name = "x"} |
| // acc.parallel firstprivate(@recipe -> %firstpriv : memref<f32>) { |
| // %val = memref.load %firstpriv[] : memref<f32> |
| // acc.yield |
| // } |
| // } |
| // |
| // Example 2: Scalar in kernels construct (implicit copy) |
| // |
| // Before: |
| // func.func @test() { |
| // %scalar = memref.alloca() {acc.var_name = "n"} : memref<i32> |
| // acc.kernels { |
| // %val = memref.load %scalar[] : memref<i32> |
| // acc.terminator |
| // } |
| // } |
| // |
| // After: |
| // func.func @test() { |
| // %scalar = memref.alloca() {acc.var_name = "n"} : memref<i32> |
| // %copyin = acc.copyin varPtr(%scalar : memref<i32>) -> memref<i32> |
| // {dataClause = #acc<data_clause acc_copy>, |
| // implicit = true, name = "n"} |
| // acc.kernels dataOperands(%copyin : memref<i32>) { |
| // %val = memref.load %copyin[] : memref<i32> |
| // acc.terminator |
| // } |
| // acc.copyout accPtr(%copyin : memref<i32>) |
| // to varPtr(%scalar : memref<i32>) |
| // {dataClause = #acc<data_clause acc_copy>, |
| // implicit = true, name = "n"} |
| // } |
| // |
| // Example 3: Array (aggregate) in parallel (implicit copy) |
| // |
| // Before: |
| // func.func @test() { |
| // %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32> |
| // acc.parallel { |
| // %c0 = arith.constant 0 : index |
| // %val = memref.load %array[%c0] : memref<100xf32> |
| // acc.yield |
| // } |
| // } |
| // |
| // After: |
| // func.func @test() { |
| // %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32> |
| // %copyin = acc.copyin varPtr(%array : memref<100xf32>) |
| // -> memref<100xf32> |
| // {dataClause = #acc<data_clause acc_copy>, |
| // implicit = true, name = "arr"} |
| // acc.parallel dataOperands(%copyin : memref<100xf32>) { |
| // %c0 = arith.constant 0 : index |
| // %val = memref.load %copyin[%c0] : memref<100xf32> |
| // acc.yield |
| // } |
| // acc.copyout accPtr(%copyin : memref<100xf32>) |
| // to varPtr(%array : memref<100xf32>) |
| // {dataClause = #acc<data_clause acc_copy>, |
| // implicit = true, name = "arr"} |
| // } |
| // |
| // Example 4: Array with default(present) |
| // |
| // Before: |
| // func.func @test() { |
| // %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32> |
| // acc.parallel { |
| // %c0 = arith.constant 0 : index |
| // %val = memref.load %array[%c0] : memref<100xf32> |
| // acc.yield |
| // } attributes {defaultAttr = #acc<defaultvalue present>} |
| // } |
| // |
| // After: |
| // func.func @test() { |
| // %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32> |
| // %present = acc.present varPtr(%array : memref<100xf32>) |
| // -> memref<100xf32> |
| // {implicit = true, name = "arr"} |
| // acc.parallel dataOperands(%present : memref<100xf32>) |
| // attributes {defaultAttr = #acc<defaultvalue present>} { |
| // %c0 = arith.constant 0 : index |
| // %val = memref.load %present[%c0] : memref<100xf32> |
| // acc.yield |
| // } |
| // acc.delete accPtr(%present : memref<100xf32>) |
| // {dataClause = #acc<data_clause acc_present>, |
| // implicit = true, name = "arr"} |
| // } |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "mlir/Dialect/OpenACC/Transforms/Passes.h" |
| #include "llvm/ADT/SmallVectorExtras.h" |
| |
| #include "mlir/Analysis/AliasAnalysis.h" |
| #include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h" |
| #include "mlir/Dialect/OpenACC/OpenACC.h" |
| #include "mlir/Dialect/OpenACC/OpenACCUtils.h" |
| #include "mlir/IR/Builders.h" |
| #include "mlir/IR/BuiltinOps.h" |
| #include "mlir/IR/Dominance.h" |
| #include "mlir/IR/Operation.h" |
| #include "mlir/IR/Value.h" |
| #include "mlir/Interfaces/FunctionInterfaces.h" |
| #include "mlir/Interfaces/ViewLikeInterface.h" |
| #include "mlir/Transforms/RegionUtils.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/TypeSwitch.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include <type_traits> |
| |
| namespace mlir { |
| namespace acc { |
| #define GEN_PASS_DEF_ACCIMPLICITDATA |
| #include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc" |
| } // namespace acc |
| } // namespace mlir |
| |
| #define DEBUG_TYPE "acc-implicit-data" |
| |
| using namespace mlir; |
| |
| namespace { |
| |
| class ACCImplicitData : public acc::impl::ACCImplicitDataBase<ACCImplicitData> { |
| public: |
| using acc::impl::ACCImplicitDataBase<ACCImplicitData>::ACCImplicitDataBase; |
| |
| void runOnOperation() override; |
| |
| private: |
| /// Looks through the `dominatingDataClauses` to find the original data clause |
| /// op for an alias. Returns nullptr if no original data clause op is found. |
| template <typename OpT> |
| Operation *getOriginalDataClauseOpForAlias( |
| Value var, OpBuilder &builder, OpT computeConstructOp, |
| const SmallVector<Value> &dominatingDataClauses); |
| |
| /// Generates the appropriate `acc.copyin`, `acc.present`,`acc.firstprivate`, |
| /// etc. data clause op for a candidate variable. |
| template <typename OpT> |
| Operation *generateDataClauseOpForCandidate( |
| Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp, |
| const SmallVector<Value> &dominatingDataClauses, |
| const std::optional<acc::ClauseDefaultValue> &defaultClause); |
| |
| /// Generates the implicit data ops for a compute construct. |
| template <typename OpT> |
| void |
| generateImplicitDataOps(ModuleOp &module, OpT computeConstructOp, |
| std::optional<acc::ClauseDefaultValue> &defaultClause, |
| acc::OpenACCSupport &accSupport); |
| |
| /// Generates a private recipe for a variable. |
| acc::PrivateRecipeOp generatePrivateRecipe(ModuleOp &module, Value var, |
| Location loc, OpBuilder &builder, |
| acc::OpenACCSupport &accSupport); |
| |
| /// Generates a firstprivate recipe for a variable. |
| acc::FirstprivateRecipeOp |
| generateFirstprivateRecipe(ModuleOp &module, Value var, Location loc, |
| OpBuilder &builder, |
| acc::OpenACCSupport &accSupport); |
| |
| /// Generates recipes for a list of variables. |
| void generateRecipes(ModuleOp &module, OpBuilder &builder, |
| Operation *computeConstructOp, |
| const SmallVector<Value> &newOperands); |
| }; |
| |
| /// Determines if a variable is a candidate for implicit data mapping. |
| /// Returns true if the variable is a candidate, false otherwise. |
| static bool isCandidateForImplicitData(Value val, Region &accRegion, |
| acc::OpenACCSupport &accSupport) { |
| // Ensure the variable is an allowed type for data clause. |
| if (!acc::isPointerLikeType(val.getType()) && |
| !acc::isMappableType(val.getType())) |
| return false; |
| |
| // If this is already coming from a data clause, we do not need to generate |
| // another. |
| if (isa_and_nonnull<ACC_DATA_ENTRY_OPS>(val.getDefiningOp())) |
| return false; |
| |
| // Device data is a candidate - it will get a deviceptr clause. |
| if (acc::isDeviceValue(val)) |
| return true; |
| |
| // If it is otherwise valid, skip it. |
| if (accSupport.isValidValueUse(val, accRegion)) |
| return false; |
| |
| return true; |
| } |
| |
| template <typename OpT> |
| Operation *ACCImplicitData::getOriginalDataClauseOpForAlias( |
| Value var, OpBuilder &builder, OpT computeConstructOp, |
| const SmallVector<Value> &dominatingDataClauses) { |
| auto &aliasAnalysis = this->getAnalysis<AliasAnalysis>(); |
| for (auto dataClause : dominatingDataClauses) { |
| if (auto *dataClauseOp = dataClause.getDefiningOp()) { |
| // Only accept clauses that guarantee that the alias is present. |
| if (isa<acc::CopyinOp, acc::CreateOp, acc::PresentOp, acc::NoCreateOp, |
| acc::DevicePtrOp>(dataClauseOp)) |
| if (aliasAnalysis.alias(acc::getVar(dataClauseOp), var).isMust()) { |
| LLVM_DEBUG(llvm::dbgs() |
| << "Using existing data clause:\n\t" << *dataClauseOp |
| << "\n\tas reference when processing var:\n\t" << var |
| << "\n";); |
| return dataClauseOp; |
| } |
| } |
| } |
| return nullptr; |
| } |
| |
| // Generates bounds for variables that have unknown dimensions |
| static void fillInBoundsForUnknownDimensions(Operation *dataClauseOp, |
| OpBuilder &builder) { |
| |
| if (!acc::getBounds(dataClauseOp).empty()) |
| // If bounds are already present, do not overwrite them. |
| return; |
| |
| // For types that have unknown dimensions, attempt to generate bounds by |
| // relying on MappableType being able to extract it from the IR. |
| auto var = acc::getVar(dataClauseOp); |
| auto type = var.getType(); |
| if (auto mappableTy = dyn_cast<acc::MappableType>(type)) { |
| if (mappableTy.hasUnknownDimensions()) { |
| TypeSwitch<Operation *>(dataClauseOp) |
| .Case<ACC_DATA_ENTRY_OPS, ACC_DATA_EXIT_OPS>([&](auto dataClauseOp) { |
| if (std::is_same_v<decltype(dataClauseOp), acc::DevicePtrOp>) |
| return; |
| OpBuilder::InsertionGuard guard(builder); |
| builder.setInsertionPoint(dataClauseOp); |
| auto bounds = mappableTy.generateAccBounds(var, builder); |
| if (!bounds.empty()) |
| dataClauseOp.getBoundsMutable().assign(bounds); |
| }); |
| } |
| } |
| } |
| |
| acc::PrivateRecipeOp |
| ACCImplicitData::generatePrivateRecipe(ModuleOp &module, Value var, |
| Location loc, OpBuilder &builder, |
| acc::OpenACCSupport &accSupport) { |
| auto type = var.getType(); |
| std::string recipeName = |
| accSupport.getRecipeName(acc::RecipeKind::private_recipe, type, var); |
| |
| // Check if recipe already exists |
| auto existingRecipe = module.lookupSymbol<acc::PrivateRecipeOp>(recipeName); |
| if (existingRecipe) |
| return existingRecipe; |
| |
| // Set insertion point to module body in a scoped way |
| OpBuilder::InsertionGuard guard(builder); |
| builder.setInsertionPointToStart(module.getBody()); |
| |
| auto recipe = |
| acc::PrivateRecipeOp::createAndPopulate(builder, loc, recipeName, var); |
| if (!recipe.has_value()) |
| return accSupport.emitNYI(loc, "implicit private"), nullptr; |
| return recipe.value(); |
| } |
| |
| acc::FirstprivateRecipeOp |
| ACCImplicitData::generateFirstprivateRecipe(ModuleOp &module, Value var, |
| Location loc, OpBuilder &builder, |
| acc::OpenACCSupport &accSupport) { |
| auto type = var.getType(); |
| std::string recipeName = |
| accSupport.getRecipeName(acc::RecipeKind::firstprivate_recipe, type, var); |
| |
| // Check if recipe already exists |
| auto existingRecipe = |
| module.lookupSymbol<acc::FirstprivateRecipeOp>(recipeName); |
| if (existingRecipe) |
| return existingRecipe; |
| |
| // Set insertion point to module body in a scoped way |
| OpBuilder::InsertionGuard guard(builder); |
| builder.setInsertionPointToStart(module.getBody()); |
| |
| auto recipe = acc::FirstprivateRecipeOp::createAndPopulate(builder, loc, |
| recipeName, var); |
| if (!recipe.has_value()) |
| return accSupport.emitNYI(loc, "implicit firstprivate"), nullptr; |
| return recipe.value(); |
| } |
| |
| void ACCImplicitData::generateRecipes(ModuleOp &module, OpBuilder &builder, |
| Operation *computeConstructOp, |
| const SmallVector<Value> &newOperands) { |
| auto &accSupport = this->getAnalysis<acc::OpenACCSupport>(); |
| for (auto var : newOperands) { |
| auto loc{var.getLoc()}; |
| if (auto privateOp = var.getDefiningOp<acc::PrivateOp>()) { |
| auto recipe = generatePrivateRecipe( |
| module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport); |
| if (recipe) |
| privateOp.setRecipeAttr( |
| SymbolRefAttr::get(module->getContext(), recipe.getSymName())); |
| } else if (auto firstprivateOp = var.getDefiningOp<acc::FirstprivateOp>()) { |
| auto recipe = generateFirstprivateRecipe( |
| module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport); |
| if (recipe) |
| firstprivateOp.setRecipeAttr(SymbolRefAttr::get( |
| module->getContext(), recipe.getSymName().str())); |
| } else { |
| accSupport.emitNYI(var.getLoc(), "implicit reduction"); |
| } |
| } |
| } |
| |
| // Generates the data entry data op clause so that it adheres to OpenACC |
| // rules as follows (line numbers and specification from OpenACC 3.4): |
| // 1388 An aggregate variable will be treated as if it appears either: |
| // 1389 - In a present clause if there is a default(present) clause visible at |
| // the compute construct. |
| // 1391 - In a copy clause otherwise. |
| // 1392 A scalar variable will be treated as if it appears either: |
| // 1393 - In a copy clause if the compute construct is a kernels construct. |
| // 1394 - In a firstprivate clause otherwise. |
| template <typename OpT> |
| Operation *ACCImplicitData::generateDataClauseOpForCandidate( |
| Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp, |
| const SmallVector<Value> &dominatingDataClauses, |
| const std::optional<acc::ClauseDefaultValue> &defaultClause) { |
| auto &accSupport = this->getAnalysis<acc::OpenACCSupport>(); |
| acc::VariableTypeCategory typeCategory = |
| acc::VariableTypeCategory::uncategorized; |
| if (auto mappableTy = dyn_cast<acc::MappableType>(var.getType())) { |
| typeCategory = mappableTy.getTypeCategory(var); |
| } else if (auto pointerLikeTy = |
| dyn_cast<acc::PointerLikeType>(var.getType())) { |
| typeCategory = pointerLikeTy.getPointeeTypeCategory( |
| cast<TypedValue<acc::PointerLikeType>>(var), |
| pointerLikeTy.getElementType()); |
| } |
| |
| bool isScalar = |
| acc::bitEnumContainsAny(typeCategory, acc::VariableTypeCategory::scalar); |
| bool isAnyAggregate = acc::bitEnumContainsAny( |
| typeCategory, acc::VariableTypeCategory::aggregate); |
| Location loc = computeConstructOp->getLoc(); |
| |
| if (acc::isDeviceValue(var)) { |
| // If the variable is device data, use deviceptr clause. |
| LLVM_DEBUG(llvm::dbgs() << "Using deviceptr clause because variable is " |
| "device data\n"); |
| return acc::DevicePtrOp::create(builder, loc, var, |
| /*structured=*/true, /*implicit=*/true, |
| accSupport.getVariableName(var)); |
| } |
| |
| Operation *op = nullptr; |
| op = getOriginalDataClauseOpForAlias(var, builder, computeConstructOp, |
| dominatingDataClauses); |
| if (op) { |
| if (isa<acc::NoCreateOp>(op)) |
| return acc::NoCreateOp::create(builder, loc, var, |
| /*structured=*/true, /*implicit=*/true, |
| accSupport.getVariableName(var), |
| acc::getBounds(op)); |
| |
| if (isa<acc::DevicePtrOp>(op)) |
| return acc::DevicePtrOp::create(builder, loc, var, |
| /*structured=*/true, /*implicit=*/true, |
| accSupport.getVariableName(var), |
| acc::getBounds(op)); |
| |
| // The original data clause op is a PresentOp, CopyinOp, or CreateOp, |
| // hence guaranteed to be present. |
| return acc::PresentOp::create(builder, loc, var, |
| /*structured=*/true, /*implicit=*/true, |
| accSupport.getVariableName(var), |
| acc::getBounds(op)); |
| } |
| |
| if (isScalar) { |
| if (enableImplicitReductionCopy && |
| acc::isOnlyUsedByReductionClauses(var, |
| computeConstructOp->getRegion(0))) { |
| auto copyinOp = |
| acc::CopyinOp::create(builder, loc, var, |
| /*structured=*/true, /*implicit=*/true, |
| accSupport.getVariableName(var)); |
| copyinOp.setDataClause(acc::DataClause::acc_reduction); |
| return copyinOp.getOperation(); |
| } |
| if constexpr (std::is_same_v<OpT, acc::KernelsOp> || |
| std::is_same_v<OpT, acc::KernelEnvironmentOp>) { |
| // Scalars are implicit copyin in kernels construct. |
| // We also do the same for acc.kernel_environment because semantics |
| // of user variable mappings should be applied while ACC construct exists |
| // and at this point we should only be dealing with unmapped variables |
| // that were made live-in by the compiler. |
| // TODO: This may be revisited. |
| auto copyinOp = |
| acc::CopyinOp::create(builder, loc, var, |
| /*structured=*/true, /*implicit=*/true, |
| accSupport.getVariableName(var)); |
| copyinOp.setDataClause(acc::DataClause::acc_copy); |
| return copyinOp.getOperation(); |
| } else { |
| // Scalars are implicit firstprivate in parallel and serial construct. |
| return acc::FirstprivateOp::create(builder, loc, var, |
| /*structured=*/true, /*implicit=*/true, |
| accSupport.getVariableName(var)); |
| } |
| } else if (isAnyAggregate) { |
| Operation *newDataOp = nullptr; |
| |
| // When default(present) is true, the implicit behavior is present. |
| if (defaultClause.has_value() && |
| defaultClause.value() == acc::ClauseDefaultValue::Present) { |
| newDataOp = acc::PresentOp::create(builder, loc, var, |
| /*structured=*/true, /*implicit=*/true, |
| accSupport.getVariableName(var)); |
| newDataOp->setAttr(acc::getFromDefaultClauseAttrName(), |
| builder.getUnitAttr()); |
| } else { |
| auto copyinOp = |
| acc::CopyinOp::create(builder, loc, var, |
| /*structured=*/true, /*implicit=*/true, |
| accSupport.getVariableName(var)); |
| copyinOp.setDataClause(acc::DataClause::acc_copy); |
| newDataOp = copyinOp.getOperation(); |
| } |
| |
| return newDataOp; |
| } else { |
| // This is not a fatal error - for example when the element type is |
| // pointer type (aka we have a pointer of pointer), it is potentially a |
| // deep copy scenario which is not being handled here. |
| // Other types need to be canonicalized. Thus just log unhandled cases. |
| LLVM_DEBUG(llvm::dbgs() |
| << "Unhandled case for implicit data mapping " << var << "\n"); |
| } |
| return nullptr; |
| } |
| |
| // Ensures that result values from the acc data clause ops are used inside the |
| // acc region. ie: |
| // acc.kernels { |
| // use %val |
| // } |
| // => |
| // %dev = acc.dataop %val |
| // acc.kernels { |
| // use %dev |
| // } |
| static void legalizeValuesInRegion(Region &accRegion, |
| SmallVector<Value> &newPrivateOperands, |
| SmallVector<Value> &newDataClauseOperands) { |
| for (Value dataClause : |
| llvm::concat<Value>(newDataClauseOperands, newPrivateOperands)) { |
| Value var = acc::getVar(dataClause.getDefiningOp()); |
| replaceAllUsesInRegionWith(var, dataClause, accRegion); |
| } |
| } |
| |
| // Adds the private operands to the compute construct operation. |
| template <typename OpT> |
| static void addNewPrivateOperands(OpT &accOp, |
| const SmallVector<Value> &privateOperands) { |
| if (privateOperands.empty()) |
| return; |
| |
| for (auto priv : privateOperands) { |
| if (isa<acc::PrivateOp>(priv.getDefiningOp())) { |
| accOp.getPrivateOperandsMutable().append(priv); |
| } else if (isa<acc::FirstprivateOp>(priv.getDefiningOp())) { |
| accOp.getFirstprivateOperandsMutable().append(priv); |
| } else { |
| llvm_unreachable("unhandled reduction operand"); |
| } |
| } |
| } |
| |
| static Operation *findDataExitOp(Operation *dataEntryOp) { |
| auto res = acc::getAccVar(dataEntryOp); |
| for (auto *user : res.getUsers()) |
| if (isa<ACC_DATA_EXIT_OPS>(user)) |
| return user; |
| return nullptr; |
| } |
| |
| // Generates matching data exit operation as described in the acc dialect |
| // for how data clauses are decomposed: |
| // https://mlir.llvm.org/docs/Dialects/OpenACCDialect/#operation-categories |
| // Key ones used here: |
| // * acc {construct} copy -> acc.copyin (before region) + acc.copyout (after |
| // region) |
| // * acc {construct} present -> acc.present (before region) + acc.delete |
| // (after region) |
| static void |
| generateDataExitOperations(OpBuilder &builder, Operation *accOp, |
| const SmallVector<Value> &newDataClauseOperands, |
| const SmallVector<Value> &sortedDataClauseOperands) { |
| builder.setInsertionPointAfter(accOp); |
| Value lastDataClause = nullptr; |
| for (auto dataEntry : llvm::reverse(sortedDataClauseOperands)) { |
| if (llvm::find(newDataClauseOperands, dataEntry) == |
| newDataClauseOperands.end()) { |
| // If this is not a new data clause operand, we should not generate an |
| // exit operation for it. |
| lastDataClause = dataEntry; |
| continue; |
| } |
| if (lastDataClause) |
| if (auto *dataExitOp = findDataExitOp(lastDataClause.getDefiningOp())) |
| builder.setInsertionPointAfter(dataExitOp); |
| Operation *dataEntryOp = dataEntry.getDefiningOp(); |
| if (isa<acc::CopyinOp>(dataEntryOp)) { |
| auto copyoutOp = acc::CopyoutOp::create( |
| builder, dataEntryOp->getLoc(), dataEntry, acc::getVar(dataEntryOp), |
| /*structured=*/true, /*implicit=*/true, |
| acc::getVarName(dataEntryOp).value(), acc::getBounds(dataEntryOp)); |
| copyoutOp.setDataClause(acc::DataClause::acc_copy); |
| } else if (isa<acc::PresentOp, acc::NoCreateOp>(dataEntryOp)) { |
| auto deleteOp = acc::DeleteOp::create( |
| builder, dataEntryOp->getLoc(), dataEntry, |
| /*structured=*/true, /*implicit=*/true, |
| acc::getVarName(dataEntryOp).value(), acc::getBounds(dataEntryOp)); |
| deleteOp.setDataClause(acc::getDataClause(dataEntryOp).value()); |
| } else if (isa<acc::DevicePtrOp>(dataEntryOp)) { |
| // Do nothing. |
| } else { |
| llvm_unreachable("unhandled data exit"); |
| } |
| lastDataClause = dataEntry; |
| } |
| } |
| |
| /// Returns all base references of a value in order. |
| /// So for example, if we have a reference to a struct field like |
| /// s.f1.f2.f3, this will return <s, s.f1, s.f1.f2, s.f1.f2.f3>. |
| /// Any intermediate casts/view-like operations are included in the |
| /// chain as well. |
| static SmallVector<Value> getBaseRefsChain(Value val) { |
| SmallVector<Value> baseRefs; |
| baseRefs.push_back(val); |
| while (true) { |
| Value prevVal = val; |
| |
| val = acc::getBaseEntity(val); |
| if (val != baseRefs.front()) |
| baseRefs.insert(baseRefs.begin(), val); |
| |
| // If this is a view-like operation, it is effectively another |
| // view of the same entity so we should add it to the chain also. |
| if (auto viewLikeOp = val.getDefiningOp<ViewLikeOpInterface>()) { |
| val = viewLikeOp.getViewSource(); |
| baseRefs.insert(baseRefs.begin(), val); |
| } |
| |
| // Continue loop if we made any progress |
| if (val == prevVal) |
| break; |
| } |
| |
| return baseRefs; |
| } |
| |
| static void insertInSortedOrder(SmallVector<Value> &sortedDataClauseOperands, |
| Operation *newClause) { |
| auto *insertPos = |
| std::find_if(sortedDataClauseOperands.begin(), |
| sortedDataClauseOperands.end(), [&](Value dataClauseVal) { |
| // Get the base refs for the current clause we are looking |
| // at. |
| auto var = acc::getVar(dataClauseVal.getDefiningOp()); |
| auto baseRefs = getBaseRefsChain(var); |
| |
| // If the newClause is of a base ref of an existing clause, |
| // we should insert it right before the current clause. |
| // Thus return true to stop iteration when this is the |
| // case. |
| return std::find(baseRefs.begin(), baseRefs.end(), |
| acc::getVar(newClause)) != baseRefs.end(); |
| }); |
| |
| if (insertPos != sortedDataClauseOperands.end()) { |
| newClause->moveBefore(insertPos->getDefiningOp()); |
| sortedDataClauseOperands.insert(insertPos, acc::getAccVar(newClause)); |
| } else { |
| sortedDataClauseOperands.push_back(acc::getAccVar(newClause)); |
| } |
| } |
| |
| template <typename OpT> |
| void ACCImplicitData::generateImplicitDataOps( |
| ModuleOp &module, OpT computeConstructOp, |
| std::optional<acc::ClauseDefaultValue> &defaultClause, |
| acc::OpenACCSupport &accSupport) { |
| // Implicit data attributes are only applied if "[t]here is no default(none) |
| // clause visible at the compute construct." |
| if (defaultClause.has_value() && |
| defaultClause.value() == acc::ClauseDefaultValue::None) |
| return; |
| assert(!defaultClause.has_value() || |
| defaultClause.value() == acc::ClauseDefaultValue::Present); |
| |
| // 1) Collect live-in values. |
| Region &accRegion = computeConstructOp->getRegion(0); |
| SetVector<Value> liveInValues; |
| getUsedValuesDefinedAbove(accRegion, liveInValues); |
| |
| // 2) Run the filtering to find relevant pointers that need copied. |
| auto isCandidate{[&](Value val) -> bool { |
| return isCandidateForImplicitData(val, accRegion, accSupport); |
| }}; |
| auto candidateVars(llvm::filter_to_vector(liveInValues, isCandidate)); |
| if (candidateVars.empty()) |
| return; |
| |
| // 3) Generate data clauses for the variables. |
| SmallVector<Value> newPrivateOperands; |
| SmallVector<Value> newDataClauseOperands; |
| OpBuilder builder(computeConstructOp); |
| if (!candidateVars.empty()) { |
| LLVM_DEBUG(llvm::dbgs() << "== Generating clauses for ==\n" |
| << computeConstructOp << "\n"); |
| } |
| auto &domInfo = this->getAnalysis<DominanceInfo>(); |
| auto &postDomInfo = this->getAnalysis<PostDominanceInfo>(); |
| auto dominatingDataClauses = |
| acc::getDominatingDataClauses(computeConstructOp, domInfo, postDomInfo); |
| for (auto var : candidateVars) { |
| auto newDataClauseOp = generateDataClauseOpForCandidate( |
| var, module, builder, computeConstructOp, dominatingDataClauses, |
| defaultClause); |
| fillInBoundsForUnknownDimensions(newDataClauseOp, builder); |
| LLVM_DEBUG(llvm::dbgs() << "Generated data clause for " << var << ":\n" |
| << "\t" << *newDataClauseOp << "\n"); |
| if (isa_and_nonnull<acc::PrivateOp, acc::FirstprivateOp, acc::ReductionOp>( |
| newDataClauseOp)) { |
| newPrivateOperands.push_back(acc::getAccVar(newDataClauseOp)); |
| } else if (isa_and_nonnull<ACC_DATA_CLAUSE_OPS>(newDataClauseOp)) { |
| newDataClauseOperands.push_back(acc::getAccVar(newDataClauseOp)); |
| dominatingDataClauses.push_back(acc::getAccVar(newDataClauseOp)); |
| } |
| } |
| |
| // 4) Legalize values in region (aka the uses in the region are the result |
| // of the data clause ops) |
| legalizeValuesInRegion(accRegion, newPrivateOperands, newDataClauseOperands); |
| |
| // 5) Generate private recipes which are required for properly attaching |
| // private operands. |
| if constexpr (!std::is_same_v<OpT, acc::KernelsOp> && |
| !std::is_same_v<OpT, acc::KernelEnvironmentOp>) |
| generateRecipes(module, builder, computeConstructOp, newPrivateOperands); |
| |
| // 6) Figure out insertion order for the new data clause operands. |
| SmallVector<Value> sortedDataClauseOperands( |
| computeConstructOp.getDataClauseOperands()); |
| for (auto newClause : newDataClauseOperands) |
| insertInSortedOrder(sortedDataClauseOperands, newClause.getDefiningOp()); |
| |
| // 7) Generate the data exit operations. |
| generateDataExitOperations(builder, computeConstructOp, newDataClauseOperands, |
| sortedDataClauseOperands); |
| // 8) Add all of the new operands to the compute construct op. |
| if constexpr (!std::is_same_v<OpT, acc::KernelsOp> && |
| !std::is_same_v<OpT, acc::KernelEnvironmentOp>) |
| addNewPrivateOperands(computeConstructOp, newPrivateOperands); |
| computeConstructOp.getDataClauseOperandsMutable().assign( |
| sortedDataClauseOperands); |
| } |
| |
| void ACCImplicitData::runOnOperation() { |
| ModuleOp module = this->getOperation(); |
| |
| acc::OpenACCSupport &accSupport = getAnalysis<acc::OpenACCSupport>(); |
| |
| module.walk([&](Operation *op) { |
| if (isa<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(op)) { |
| assert(op->getNumRegions() == 1 && "must have 1 region"); |
| |
| auto defaultClause = acc::getDefaultAttr(op); |
| llvm::TypeSwitch<Operation *, void>(op) |
| .Case<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>( |
| [&](auto op) { |
| generateImplicitDataOps(module, op, defaultClause, accSupport); |
| }) |
| .Default([&](Operation *) {}); |
| } |
| }); |
| } |
| |
| } // namespace |