| //===----------------------------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // Helperes to emit OpenACC clause recipes as CIR code. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include <numeric> |
| |
| #include "CIRGenOpenACCRecipe.h" |
| |
| namespace clang::CIRGen { |
| mlir::Block *OpenACCRecipeBuilderBase::createRecipeBlock(mlir::Region ®ion, |
| mlir::Type opTy, |
| mlir::Location loc, |
| size_t numBounds, |
| bool isInit) { |
| llvm::SmallVector<mlir::Type> types; |
| types.reserve(numBounds + 2); |
| types.push_back(opTy); |
| // The init section is the only one that doesn't have TWO copies of the |
| // operation-type. Copy has a to/from, and destroy has a |
| // 'reference'/'privatized' copy version. |
| if (!isInit) |
| types.push_back(opTy); |
| |
| auto boundsTy = mlir::acc::DataBoundsType::get(&cgf.getMLIRContext()); |
| for (size_t i = 0; i < numBounds; ++i) |
| types.push_back(boundsTy); |
| |
| llvm::SmallVector<mlir::Location> locs{types.size(), loc}; |
| return builder.createBlock(®ion, region.end(), types, locs); |
| } |
| void OpenACCRecipeBuilderBase::makeAllocaCopy(mlir::Location loc, |
| mlir::Type copyType, |
| mlir::Value numEltsToCopy, |
| mlir::Value offsetPerSubarray, |
| mlir::Value destAlloca, |
| mlir::Value srcAlloca) { |
| mlir::OpBuilder::InsertionGuard guardCase(builder); |
| |
| mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy); |
| auto itrPtrTy = cir::PointerType::get(itrTy); |
| mlir::IntegerAttr itrAlign = |
| cgf.cgm.getSize(cgf.getContext().getTypeAlignInChars( |
| cgf.getContext().UnsignedLongLongTy)); |
| |
| auto loopBuilder = [&]() { |
| auto itr = |
| cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "itr", itrAlign); |
| cir::ConstantOp constZero = builder.getConstInt(loc, itrTy, 0); |
| builder.CIRBaseBuilderTy::createStore(loc, constZero, itr); |
| builder.createFor( |
| loc, |
| /*condBuilder=*/ |
| [&](mlir::OpBuilder &b, mlir::Location loc) { |
| // itr < numEltsToCopy |
| // Enforce a trip count of 1 if there wasn't any element count, this |
| // way we can just use this loop with a constant bounds instead of a |
| // separate code path. |
| if (!numEltsToCopy) |
| numEltsToCopy = builder.getConstInt(loc, itrTy, 1); |
| |
| auto loadCur = cir::LoadOp::create(builder, loc, {itr}); |
| auto cmp = builder.createCompare(loc, cir::CmpOpKind::lt, loadCur, |
| numEltsToCopy); |
| builder.createCondition(cmp); |
| }, |
| /*bodyBuilder=*/ |
| [&](mlir::OpBuilder &b, mlir::Location loc) { |
| // destAlloca[itr] = srcAlloca[offsetPerSubArray * itr]; |
| auto loadCur = cir::LoadOp::create(builder, loc, {itr}); |
| auto srcOffset = builder.createMul(loc, offsetPerSubarray, loadCur); |
| |
| auto ptrToOffsetIntoSrc = cir::PtrStrideOp::create( |
| builder, loc, copyType, srcAlloca, srcOffset); |
| |
| auto offsetIntoDecayDest = cir::PtrStrideOp::create( |
| builder, loc, builder.getPointerTo(copyType), destAlloca, |
| loadCur); |
| |
| builder.CIRBaseBuilderTy::createStore(loc, ptrToOffsetIntoSrc, |
| offsetIntoDecayDest); |
| builder.createYield(loc); |
| }, |
| /*stepBuilder=*/ |
| [&](mlir::OpBuilder &b, mlir::Location loc) { |
| // Simple increment of the iterator. |
| auto load = cir::LoadOp::create(builder, loc, {itr}); |
| auto inc = cir::UnaryOp::create(builder, loc, load.getType(), |
| cir::UnaryOpKind::Inc, load); |
| builder.CIRBaseBuilderTy::createStore(loc, inc, itr); |
| builder.createYield(loc); |
| }); |
| }; |
| |
| cir::ScopeOp::create(builder, loc, |
| [&](mlir::OpBuilder &b, mlir::Location loc) { |
| loopBuilder(); |
| builder.createYield(loc); |
| }); |
| } |
| |
| mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca( |
| mlir::Block *block, SourceRange exprRange, mlir::Location loc, |
| std::string_view allocaName, size_t numBounds, |
| llvm::ArrayRef<QualType> boundTypes) { |
| mlir::OpBuilder::InsertionGuard guardCase(builder); |
| |
| // Get the range of bounds arguments, which are all but the 1st arg. |
| llvm::ArrayRef<mlir::BlockArgument> boundsRange = |
| block->getArguments().drop_front(1); |
| |
| // boundTypes contains the before and after of each bounds, so it ends up |
| // having 1 extra. Assert this is the case to ensure we don't call this in the |
| // wrong 'block'. |
| assert(boundsRange.size() + 1 == boundTypes.size()); |
| |
| mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy); |
| auto idxType = mlir::IndexType::get(&cgf.getMLIRContext()); |
| |
| auto getUpperBound = [&](mlir::Value bound) { |
| auto upperBoundVal = |
| mlir::acc::GetUpperboundOp::create(builder, loc, idxType, bound); |
| return mlir::UnrealizedConversionCastOp::create(builder, loc, itrTy, |
| upperBoundVal.getResult()) |
| .getResult(0); |
| }; |
| |
| auto isArrayTy = [&](QualType ty) { |
| if (ty->isArrayType() && !ty->isConstantArrayType()) |
| cgf.cgm.errorNYI(exprRange, "OpenACC recipe init for VLAs"); |
| return ty->isConstantArrayType(); |
| }; |
| |
| mlir::Type topLevelTy = cgf.convertType(boundTypes.back()); |
| cir::PointerType topLevelTyPtr = builder.getPointerTo(topLevelTy); |
| // Do an alloca for the 'top' level type without bounds. |
| mlir::Value initialAlloca = builder.createAlloca( |
| loc, topLevelTyPtr, topLevelTy, allocaName, |
| cgf.getContext().getTypeAlignInChars(boundTypes.back())); |
| |
| bool lastBoundWasArray = isArrayTy(boundTypes.back()); |
| |
| // Make sure we track a moving version of this so we can get our |
| // 'copying' back to correct. |
| mlir::Value lastAlloca = initialAlloca; |
| |
| // Since we're iterating the types in reverse, this sets up for each index |
| // corresponding to the boundsRange to be the 'after application of the |
| // bounds. |
| llvm::ArrayRef<QualType> boundResults = boundTypes.drop_back(1); |
| |
| // Collect the 'do we have any allocas needed after this type' list. |
| llvm::SmallVector<bool> allocasLeftArr; |
| llvm::ArrayRef<QualType> resultTypes = boundTypes.drop_front(); |
| std::transform_inclusive_scan( |
| resultTypes.begin(), resultTypes.end(), |
| std::back_inserter(allocasLeftArr), std::plus<bool>{}, |
| [](QualType ty) { return !ty->isConstantArrayType(); }, false); |
| |
| // Keep track of the number of 'elements' that we're allocating. Individual |
| // allocas should multiply this by the size of its current allocation. |
| mlir::Value cumulativeElts; |
| for (auto [bound, resultType, allocasLeft] : llvm::reverse( |
| llvm::zip_equal(boundsRange, boundResults, allocasLeftArr))) { |
| |
| // if there is no further 'alloca' operation we need to do, we can skip |
| // creating the UB/multiplications/etc. |
| if (!allocasLeft) |
| break; |
| |
| // First: figure out the number of elements in the current 'bound' list. |
| mlir::Value eltsPerSubArray = getUpperBound(bound); |
| mlir::Value eltsToAlloca; |
| |
| // IF we are in a sub-bounds, the total number of elements to alloca is |
| // the product of that one and the current 'bounds' size. That is, |
| // arr[5][5], we would need 25 elements, not just 5. Else it is just the |
| // current number of elements. |
| if (cumulativeElts) |
| eltsToAlloca = builder.createMul(loc, eltsPerSubArray, cumulativeElts); |
| else |
| eltsToAlloca = eltsPerSubArray; |
| |
| if (!lastBoundWasArray) { |
| // If we have to do an allocation, figure out the size of the |
| // allocation. alloca takes the number of bytes, not elements. |
| TypeInfoChars eltInfo = cgf.getContext().getTypeInfoInChars(resultType); |
| cir::ConstantOp eltSize = builder.getConstInt( |
| loc, itrTy, eltInfo.Width.alignTo(eltInfo.Align).getQuantity()); |
| mlir::Value curSize = builder.createMul(loc, eltsToAlloca, eltSize); |
| |
| mlir::Type eltTy = cgf.convertType(resultType); |
| cir::PointerType ptrTy = builder.getPointerTo(eltTy); |
| mlir::Value curAlloca = builder.createAlloca( |
| loc, ptrTy, eltTy, "openacc.init.bounds", |
| cgf.getContext().getTypeAlignInChars(resultType), curSize); |
| |
| makeAllocaCopy(loc, ptrTy, cumulativeElts, eltsPerSubArray, lastAlloca, |
| curAlloca); |
| lastAlloca = curAlloca; |
| } else { |
| // In the case of an array, we just need to decay the pointer, so just do |
| // a zero-offset stride on the last alloca to decay it down an array |
| // level. |
| cir::ConstantOp constZero = builder.getConstInt(loc, itrTy, 0); |
| lastAlloca = builder.getArrayElement(loc, loc, lastAlloca, |
| cgf.convertType(resultType), |
| constZero, /*shouldDecay=*/true); |
| } |
| |
| cumulativeElts = eltsToAlloca; |
| lastBoundWasArray = isArrayTy(resultType); |
| } |
| return initialAlloca; |
| } |
| |
| std::pair<mlir::Value, mlir::Value> OpenACCRecipeBuilderBase::createBoundsLoop( |
| mlir::Value subscriptedValue, mlir::Value subscriptedValue2, |
| mlir::Value bound, mlir::Location loc, bool inverse) { |
| mlir::Operation *bodyInsertLoc; |
| |
| mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy); |
| auto itrPtrTy = cir::PointerType::get(itrTy); |
| mlir::IntegerAttr itrAlign = |
| cgf.cgm.getSize(cgf.getContext().getTypeAlignInChars( |
| cgf.getContext().UnsignedLongLongTy)); |
| auto idxType = mlir::IndexType::get(&cgf.getMLIRContext()); |
| |
| auto doSubscriptOp = [&](mlir::Value subVal, |
| cir::LoadOp idxLoad) -> mlir::Value { |
| auto eltTy = cast<cir::PointerType>(subVal.getType()).getPointee(); |
| |
| if (auto arrayTy = dyn_cast<cir::ArrayType>(eltTy)) |
| return builder.getArrayElement(loc, loc, subVal, arrayTy.getElementType(), |
| idxLoad, |
| /*shouldDecay=*/true); |
| |
| assert(isa<cir::PointerType>(eltTy)); |
| |
| auto eltLoad = cir::LoadOp::create(builder, loc, {subVal}); |
| |
| return cir::PtrStrideOp::create(builder, loc, eltLoad.getType(), eltLoad, |
| idxLoad); |
| }; |
| |
| auto forStmtBuilder = [&]() { |
| // get the lower and upper bound for iterating over. |
| auto lowerBoundVal = |
| mlir::acc::GetLowerboundOp::create(builder, loc, idxType, bound); |
| auto lbConversion = mlir::UnrealizedConversionCastOp::create( |
| builder, loc, itrTy, lowerBoundVal.getResult()); |
| auto upperBoundVal = |
| mlir::acc::GetUpperboundOp::create(builder, loc, idxType, bound); |
| auto ubConversion = mlir::UnrealizedConversionCastOp::create( |
| builder, loc, itrTy, upperBoundVal.getResult()); |
| |
| // Create a memory location for the iterator. |
| auto itr = |
| cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "iter", itrAlign); |
| // Store to the iterator: either lower bound, or if inverse loop, upper |
| // bound. |
| if (inverse) { |
| cir::ConstantOp constOne = builder.getConstInt(loc, itrTy, 1); |
| |
| auto sub = cir::BinOp::create(builder, loc, itrTy, cir::BinOpKind::Sub, |
| ubConversion.getResult(0), constOne); |
| |
| // Upperbound is exclusive, so subtract 1. |
| builder.CIRBaseBuilderTy::createStore(loc, sub, itr); |
| } else { |
| // Lowerbound is inclusive, so we can include it. |
| builder.CIRBaseBuilderTy::createStore(loc, lbConversion.getResult(0), |
| itr); |
| } |
| // Save the 'end' iterator based on whether we are inverted or not. This |
| // end iterator never changes, so we can just get it and convert it, so no |
| // need to store/load/etc. |
| auto endItr = inverse ? lbConversion : ubConversion; |
| |
| builder.createFor( |
| loc, |
| /*condBuilder=*/ |
| [&](mlir::OpBuilder &b, mlir::Location loc) { |
| auto loadCur = cir::LoadOp::create(builder, loc, {itr}); |
| // Use 'not equal' since we are just doing an increment/decrement. |
| auto cmp = builder.createCompare( |
| loc, inverse ? cir::CmpOpKind::ge : cir::CmpOpKind::lt, loadCur, |
| endItr.getResult(0)); |
| builder.createCondition(cmp); |
| }, |
| /*bodyBuilder=*/ |
| [&](mlir::OpBuilder &b, mlir::Location loc) { |
| auto load = cir::LoadOp::create(builder, loc, {itr}); |
| |
| if (subscriptedValue) |
| subscriptedValue = doSubscriptOp(subscriptedValue, load); |
| if (subscriptedValue2) |
| subscriptedValue2 = doSubscriptOp(subscriptedValue2, load); |
| bodyInsertLoc = builder.createYield(loc); |
| }, |
| /*stepBuilder=*/ |
| [&](mlir::OpBuilder &b, mlir::Location loc) { |
| auto load = cir::LoadOp::create(builder, loc, {itr}); |
| auto unary = cir::UnaryOp::create( |
| builder, loc, load.getType(), |
| inverse ? cir::UnaryOpKind::Dec : cir::UnaryOpKind::Inc, load); |
| builder.CIRBaseBuilderTy::createStore(loc, unary, itr); |
| builder.createYield(loc); |
| }); |
| }; |
| |
| cir::ScopeOp::create(builder, loc, |
| [&](mlir::OpBuilder &b, mlir::Location loc) { |
| forStmtBuilder(); |
| builder.createYield(loc); |
| }); |
| |
| // Leave the insertion point to be inside the body, so we can loop over |
| // these things. |
| builder.setInsertionPoint(bodyInsertLoc); |
| return {subscriptedValue, subscriptedValue2}; |
| } |
| |
| mlir::acc::ReductionOperator |
| OpenACCRecipeBuilderBase::convertReductionOp(OpenACCReductionOperator op) { |
| switch (op) { |
| case OpenACCReductionOperator::Addition: |
| return mlir::acc::ReductionOperator::AccAdd; |
| case OpenACCReductionOperator::Multiplication: |
| return mlir::acc::ReductionOperator::AccMul; |
| case OpenACCReductionOperator::Max: |
| return mlir::acc::ReductionOperator::AccMax; |
| case OpenACCReductionOperator::Min: |
| return mlir::acc::ReductionOperator::AccMin; |
| case OpenACCReductionOperator::BitwiseAnd: |
| return mlir::acc::ReductionOperator::AccIand; |
| case OpenACCReductionOperator::BitwiseOr: |
| return mlir::acc::ReductionOperator::AccIor; |
| case OpenACCReductionOperator::BitwiseXOr: |
| return mlir::acc::ReductionOperator::AccXor; |
| case OpenACCReductionOperator::And: |
| return mlir::acc::ReductionOperator::AccLand; |
| case OpenACCReductionOperator::Or: |
| return mlir::acc::ReductionOperator::AccLor; |
| case OpenACCReductionOperator::Invalid: |
| llvm_unreachable("invalid reduction operator"); |
| } |
| |
| llvm_unreachable("invalid reduction operator"); |
| } |
| |
| // This function generates the 'destroy' section for a recipe. Note |
| // that this function is not 'insertion point' clean, in that it alters the |
| // insertion point to be inside of the 'destroy' section of the recipe, but |
| // doesn't restore it aftewards. |
| void OpenACCRecipeBuilderBase::createRecipeDestroySection( |
| mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, |
| CharUnits alignment, QualType origType, size_t numBounds, QualType baseType, |
| mlir::Region &destroyRegion) { |
| mlir::Block *block = createRecipeBlock(destroyRegion, mainOp.getType(), loc, |
| numBounds, /*isInit=*/false); |
| builder.setInsertionPointToEnd(&destroyRegion.back()); |
| CIRGenFunction::LexicalScope ls(cgf, loc, block); |
| |
| mlir::Type elementTy = |
| mlir::cast<cir::PointerType>(mainOp.getType()).getPointee(); |
| auto emitDestroy = [&](mlir::Value var, mlir::Type ty) { |
| Address addr{var, ty, alignment}; |
| cgf.emitDestroy(addr, origType, |
| cgf.getDestroyer(QualType::DK_cxx_destructor)); |
| }; |
| |
| if (numBounds) { |
| mlir::OpBuilder::InsertionGuard guardCase(builder); |
| // Get the range of bounds arguments, which are all but the 1st 2. 1st is |
| // a 'reference', 2nd is the 'private' variant we need to destroy from. |
| llvm::MutableArrayRef<mlir::BlockArgument> boundsRange = |
| block->getArguments().drop_front(2); |
| |
| mlir::Value subscriptedValue = block->getArgument(1); |
| for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) |
| subscriptedValue = createBoundsLoop(subscriptedValue, boundArg, loc, |
| /*inverse=*/true); |
| |
| emitDestroy(subscriptedValue, cgf.cgm.convertType(origType)); |
| } else { |
| // If we don't have any bounds, we can just destroy the variable directly. |
| // The destroy region has a signature of "original item, privatized item". |
| // So the 2nd item is the one that needs destroying, the former is just |
| // for reference and we don't really have a need for it at the moment. |
| emitDestroy(block->getArgument(1), elementTy); |
| } |
| |
| ls.forceCleanup(); |
| mlir::acc::YieldOp::create(builder, locEnd); |
| } |
| void OpenACCRecipeBuilderBase::makeBoundsInit( |
| mlir::Value alloca, mlir::Location loc, mlir::Block *block, |
| const VarDecl *allocaDecl, QualType origType, bool isInitSection) { |
| mlir::OpBuilder::InsertionGuard guardCase(builder); |
| builder.setInsertionPointToEnd(block); |
| CIRGenFunction::LexicalScope ls(cgf, loc, block); |
| |
| CIRGenFunction::AutoVarEmission tempDeclEmission{*allocaDecl}; |
| tempDeclEmission.emittedAsOffload = true; |
| |
| // The init section is the only one of the handful that only has a single |
| // argument for the 'type', so we have to drop 1 for init, and future calls |
| // to this will need to drop 2. |
| llvm::MutableArrayRef<mlir::BlockArgument> boundsRange = |
| block->getArguments().drop_front(isInitSection ? 1 : 2); |
| |
| mlir::Value subscriptedValue = alloca; |
| for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) |
| subscriptedValue = createBoundsLoop(subscriptedValue, boundArg, loc, |
| /*inverse=*/false); |
| |
| tempDeclEmission.setAllocatedAddress( |
| Address{subscriptedValue, cgf.convertType(origType), |
| cgf.getContext().getDeclAlign(allocaDecl)}); |
| cgf.emitAutoVarInit(tempDeclEmission); |
| } |
| |
| // TODO: OpenACC: when we start doing firstprivate for array/vlas/etc, we |
| // probably need to do a little work about the 'init' calls to put it in 'copy' |
| // region instead. |
| void OpenACCRecipeBuilderBase::createInitRecipe( |
| mlir::Location loc, mlir::Location locEnd, SourceRange exprRange, |
| mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds, |
| llvm::ArrayRef<QualType> boundTypes, const VarDecl *allocaDecl, |
| QualType origType, bool emitInitExpr) { |
| assert(allocaDecl && "Required recipe variable not set?"); |
| CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, allocaDecl}; |
| |
| mlir::Block *block = createRecipeBlock(recipeInitRegion, mainOp.getType(), |
| loc, numBounds, /*isInit=*/true); |
| builder.setInsertionPointToEnd(&recipeInitRegion.back()); |
| CIRGenFunction::LexicalScope ls(cgf, loc, block); |
| |
| const Type *allocaPointeeType = |
| allocaDecl->getType()->getPointeeOrArrayElementType(); |
| // We are OK with no init for builtins, arrays of builtins, or pointers, |
| // else we should NYI so we know to go look for these. |
| if (cgf.getContext().getLangOpts().CPlusPlus && !allocaDecl->getInit() && |
| !allocaDecl->getType()->isPointerType() && |
| !allocaPointeeType->isBuiltinType() && |
| !allocaPointeeType->isPointerType()) { |
| // If we don't have any initialization recipe, we failed during Sema to |
| // initialize this correctly. If we disable the |
| // Sema::TentativeAnalysisScopes in SemaOpenACC::CreateInitRecipe, it'll |
| // emit an error to tell us. However, emitting those errors during |
| // production is a violation of the standard, so we cannot do them. |
| cgf.cgm.errorNYI(exprRange, "private/reduction default-init recipe"); |
| } |
| |
| if (!numBounds) { |
| // This is an 'easy' case, we just have to use the builtin init stuff to |
| // initialize this variable correctly. |
| CIRGenFunction::AutoVarEmission tempDeclEmission = |
| cgf.emitAutoVarAlloca(*allocaDecl, builder.saveInsertionPoint()); |
| if (emitInitExpr) |
| cgf.emitAutoVarInit(tempDeclEmission); |
| } else { |
| mlir::Value alloca = makeBoundsAlloca( |
| block, exprRange, loc, allocaDecl->getName(), numBounds, boundTypes); |
| |
| // If the initializer is trivial, there is nothing to do here, so save |
| // ourselves some effort. |
| if (emitInitExpr && allocaDecl->getInit() && |
| (!cgf.isTrivialInitializer(allocaDecl->getInit()) || |
| cgf.getContext().getLangOpts().getTrivialAutoVarInit() != |
| LangOptions::TrivialAutoVarInitKind::Uninitialized)) |
| makeBoundsInit(alloca, loc, block, allocaDecl, origType, |
| /*isInitSection=*/true); |
| } |
| |
| ls.forceCleanup(); |
| mlir::acc::YieldOp::create(builder, locEnd); |
| } |
| |
| void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy( |
| mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, |
| const VarDecl *allocaDecl, const VarDecl *temporary, |
| mlir::Region ©Region, size_t numBounds) { |
| mlir::Block *block = createRecipeBlock(copyRegion, mainOp.getType(), loc, |
| numBounds, /*isInit=*/false); |
| builder.setInsertionPointToEnd(©Region.back()); |
| CIRGenFunction::LexicalScope ls(cgf, loc, block); |
| |
| mlir::Value fromArg = block->getArgument(0); |
| mlir::Value toArg = block->getArgument(1); |
| |
| llvm::MutableArrayRef<mlir::BlockArgument> boundsRange = |
| block->getArguments().drop_front(2); |
| |
| for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) |
| std::tie(fromArg, toArg) = |
| createBoundsLoop(fromArg, toArg, boundArg, loc, /*inverse=*/false); |
| |
| // Set up the 'to' address. |
| mlir::Type elementTy = |
| mlir::cast<cir::PointerType>(toArg.getType()).getPointee(); |
| CIRGenFunction::AutoVarEmission tempDeclEmission(*allocaDecl); |
| tempDeclEmission.emittedAsOffload = true; |
| tempDeclEmission.setAllocatedAddress( |
| Address{toArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)}); |
| |
| // Set up the 'from' address from the temporary. |
| CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, temporary}; |
| cgf.setAddrOfLocalVar( |
| temporary, |
| Address{fromArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)}); |
| cgf.emitAutoVarInit(tempDeclEmission); |
| |
| builder.setInsertionPointToEnd(©Region.back()); |
| ls.forceCleanup(); |
| mlir::acc::YieldOp::create(builder, locEnd); |
| } |
| |
| // This function generates the 'combiner' section for a reduction recipe. Note |
| // that this function is not 'insertion point' clean, in that it alters the |
| // insertion point to be inside of the 'combiner' section of the recipe, but |
| // doesn't restore it aftewards. |
| void OpenACCRecipeBuilderBase::createReductionRecipeCombiner( |
| mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, |
| mlir::acc::ReductionRecipeOp recipe, size_t numBounds, QualType origType, |
| llvm::ArrayRef<OpenACCReductionRecipe::CombinerRecipe> combinerRecipes) { |
| mlir::Block *block = |
| createRecipeBlock(recipe.getCombinerRegion(), mainOp.getType(), loc, |
| numBounds, /*isInit=*/false); |
| builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back()); |
| CIRGenFunction::LexicalScope ls(cgf, loc, block); |
| |
| mlir::Value lhsArg = block->getArgument(0); |
| mlir::Value rhsArg = block->getArgument(1); |
| llvm::MutableArrayRef<mlir::BlockArgument> boundsRange = |
| block->getArguments().drop_front(2); |
| |
| if (llvm::any_of(combinerRecipes, [](auto &r) { return r.Op == nullptr; })) { |
| cgf.cgm.errorNYI(loc, "OpenACC Reduction combiner not generated"); |
| mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0)); |
| return; |
| } |
| |
| // apply the bounds so that we can get our bounds emitted correctly. |
| for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) |
| std::tie(lhsArg, rhsArg) = |
| createBoundsLoop(lhsArg, rhsArg, boundArg, loc, /*inverse=*/false); |
| |
| // Emitter for when we know this isn't a struct or array we have to loop |
| // through. This should work for the 'field' once the get-element call has |
| // been made. |
| auto emitSingleCombiner = |
| [&](mlir::Value lhsArg, mlir::Value rhsArg, |
| const OpenACCReductionRecipe::CombinerRecipe &combiner) { |
| mlir::Type elementTy = |
| mlir::cast<cir::PointerType>(lhsArg.getType()).getPointee(); |
| CIRGenFunction::DeclMapRevertingRAII declMapRAIILhs{cgf, combiner.LHS}; |
| cgf.setAddrOfLocalVar( |
| combiner.LHS, Address{lhsArg, elementTy, |
| cgf.getContext().getDeclAlign(combiner.LHS)}); |
| CIRGenFunction::DeclMapRevertingRAII declMapRAIIRhs{cgf, combiner.RHS}; |
| cgf.setAddrOfLocalVar( |
| combiner.RHS, Address{rhsArg, elementTy, |
| cgf.getContext().getDeclAlign(combiner.RHS)}); |
| |
| [[maybe_unused]] mlir::LogicalResult stmtRes = |
| cgf.emitStmt(combiner.Op, /*useCurrentScope=*/true); |
| }; |
| |
| // Emitter for when we know this is either a non-array or element of an array |
| // (which also shouldn't be an array type?). This function should generate the |
| // initialization code for an entire 'array-element'/non-array, including |
| // diving into each element of a struct (if necessary). |
| auto emitCombiner = [&](mlir::Value lhsArg, mlir::Value rhsArg, QualType ty) { |
| assert(!ty->isArrayType() && "Array type shouldn't get here"); |
| if (const auto *rd = ty->getAsRecordDecl()) { |
| if (combinerRecipes.size() == 1 && |
| cgf.getContext().hasSameType(ty, combinerRecipes[0].LHS->getType())) { |
| // If this is a 'top level' operator on the type we can just emit this |
| // as a simple one. |
| emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]); |
| } else { |
| // else we have to handle each individual field after after a |
| // get-element. |
| const CIRGenRecordLayout &layout = |
| cgf.cgm.getTypes().getCIRGenRecordLayout(rd); |
| for (const auto &[field, combiner] : |
| llvm::zip_equal(rd->fields(), combinerRecipes)) { |
| mlir::Type fieldType = cgf.convertType(field->getType()); |
| auto fieldPtr = cir::PointerType::get(fieldType); |
| unsigned fieldIndex = layout.getCIRFieldNo(field); |
| |
| mlir::Value lhsField = builder.createGetMember( |
| loc, fieldPtr, lhsArg, field->getName(), fieldIndex); |
| mlir::Value rhsField = builder.createGetMember( |
| loc, fieldPtr, rhsArg, field->getName(), fieldIndex); |
| |
| emitSingleCombiner(lhsField, rhsField, combiner); |
| } |
| } |
| |
| } else { |
| // if this is a single-thing (because we should know this isn't an array, |
| // as Sema wouldn't let us get here), we can just do a normal emit call. |
| emitSingleCombiner(lhsArg, rhsArg, combinerRecipes[0]); |
| } |
| }; |
| |
| if (const auto *cat = cgf.getContext().getAsConstantArrayType(origType)) { |
| // If we're in an array, we have to emit the combiner for each element of |
| // the array. |
| auto itrTy = mlir::cast<cir::IntType>(cgf.ptrDiffTy); |
| auto itrPtrTy = cir::PointerType::get(itrTy); |
| |
| mlir::Value zero = |
| builder.getConstInt(loc, mlir::cast<cir::IntType>(cgf.ptrDiffTy), 0); |
| mlir::Value itr = |
| cir::AllocaOp::create(builder, loc, itrPtrTy, itrTy, "itr", |
| cgf.cgm.getSize(cgf.getPointerAlign())); |
| builder.CIRBaseBuilderTy::createStore(loc, zero, itr); |
| |
| builder.setInsertionPointAfter(builder.createFor( |
| loc, |
| /*condBuilder=*/ |
| [&](mlir::OpBuilder &b, mlir::Location loc) { |
| auto loadItr = cir::LoadOp::create(builder, loc, {itr}); |
| mlir::Value arraySize = builder.getConstInt( |
| loc, mlir::cast<cir::IntType>(cgf.ptrDiffTy), cat->getZExtSize()); |
| auto cmp = builder.createCompare(loc, cir::CmpOpKind::lt, loadItr, |
| arraySize); |
| builder.createCondition(cmp); |
| }, |
| /*bodyBuilder=*/ |
| [&](mlir::OpBuilder &b, mlir::Location loc) { |
| auto loadItr = cir::LoadOp::create(builder, loc, {itr}); |
| auto lhsElt = builder.getArrayElement( |
| loc, loc, lhsArg, cgf.convertType(cat->getElementType()), loadItr, |
| /*shouldDecay=*/true); |
| auto rhsElt = builder.getArrayElement( |
| loc, loc, rhsArg, cgf.convertType(cat->getElementType()), loadItr, |
| /*shouldDecay=*/true); |
| |
| emitCombiner(lhsElt, rhsElt, cat->getElementType()); |
| builder.createYield(loc); |
| }, |
| /*stepBuilder=*/ |
| [&](mlir::OpBuilder &b, mlir::Location loc) { |
| auto loadItr = cir::LoadOp::create(builder, loc, {itr}); |
| auto inc = cir::UnaryOp::create(builder, loc, loadItr.getType(), |
| cir::UnaryOpKind::Inc, loadItr); |
| builder.CIRBaseBuilderTy::createStore(loc, inc, itr); |
| builder.createYield(loc); |
| })); |
| |
| } else if (origType->isArrayType()) { |
| cgf.cgm.errorNYI(loc, |
| "OpenACC Reduction combiner non-constant array recipe"); |
| } else { |
| emitCombiner(lhsArg, rhsArg, origType); |
| } |
| |
| builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back()); |
| ls.forceCleanup(); |
| mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0)); |
| } |
| |
| } // namespace clang::CIRGen |