[flang][cuda] Do inline allocation/deallocation in device code (#106628)
ALLOCATE and DEALLOCATE statements can be inlined in device function.
This patch updates the condition that determined to inline these actions
in lowering.
This avoid runtime calls in device function code and can speed up the
execution.
Also move `isCudaDeviceContext` from `Bridge.cpp` so it can be used
elsewhere.
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 078e17b..90943fa 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -20,6 +20,7 @@
#include "flang/Lower/ConvertExprToHLFIR.h"
#include "flang/Lower/ConvertType.h"
#include "flang/Lower/ConvertVariable.h"
+#include "flang/Lower/Cuda.h"
#include "flang/Lower/HostAssociations.h"
#include "flang/Lower/IO.h"
#include "flang/Lower/IterationSpace.h"
@@ -4377,36 +4378,13 @@
return temps;
}
- // Check if the insertion point is currently in a device context. HostDevice
- // subprogram are not considered fully device context so it will return false
- // for it.
- // If the insertion point is inside an OpenACC region op, it is considered
- // device context.
- static bool isCudaDeviceContext(fir::FirOpBuilder &builder) {
- if (builder.getRegion().getParentOfType<cuf::KernelOp>())
- return true;
- if (builder.getRegion()
- .getParentOfType<mlir::acc::ComputeRegionOpInterface>())
- return true;
- if (auto funcOp =
- builder.getRegion().getParentOfType<mlir::func::FuncOp>()) {
- if (auto cudaProcAttr =
- funcOp.getOperation()->getAttrOfType<cuf::ProcAttributeAttr>(
- cuf::getProcAttrName())) {
- return cudaProcAttr.getValue() != cuf::ProcAttribute::Host &&
- cudaProcAttr.getValue() != cuf::ProcAttribute::HostDevice;
- }
- }
- return false;
- }
-
void genDataAssignment(
const Fortran::evaluate::Assignment &assign,
const Fortran::evaluate::ProcedureRef *userDefinedAssignment) {
mlir::Location loc = getCurrentLocation();
fir::FirOpBuilder &builder = getFirOpBuilder();
- bool isInDeviceContext = isCudaDeviceContext(builder);
+ bool isInDeviceContext = Fortran::lower::isCudaDeviceContext(builder);
bool isCUDATransfer = (Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs) ||
Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs)) &&