[flang][OpenMP] Initialize allocatable members of derived types (#120295)
Allocatable members of privatized derived types must be allocated,
with the same bounds as the original object, whenever that member
is also allocated in it, but Flang was not performing such
initialization.
The `Initialize` runtime function can't perform this task unless
its signature is changed to receive an additional parameter, the
original object, that is needed to find out which allocatable
members, with their bounds, must also be allocated in the clone.
As `Initialize` is used not only for privatization, sometimes this
other object won't even exist, so this new parameter would need
to be optional.
Because of this, it seemed better to add a new runtime function:
`InitializeClone`.
To avoid unnecessary calls, lowering inserts a call to it only for
privatized items that are derived types with allocatable members.
Fixes https://github.com/llvm/llvm-project/issues/114888
Fixes https://github.com/llvm/llvm-project/issues/114889
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index 307ba6a..8f026ac 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -89,6 +89,9 @@
virtual mlir::Value getSymbolAddress(SymbolRef sym) = 0;
virtual fir::ExtendedValue
+ symBoxToExtendedValue(const Fortran::lower::SymbolBox &symBox) = 0;
+
+ virtual fir::ExtendedValue
getSymbolExtendedValue(const Fortran::semantics::Symbol &sym,
Fortran::lower::SymMap *symMap = nullptr) = 0;
diff --git a/flang/include/flang/Lower/ConvertVariable.h b/flang/include/flang/Lower/ConvertVariable.h
index de394a3..b9d7f89 100644
--- a/flang/include/flang/Lower/ConvertVariable.h
+++ b/flang/include/flang/Lower/ConvertVariable.h
@@ -70,6 +70,11 @@
const Fortran::semantics::Symbol &sym,
Fortran::lower::SymMap &symMap);
+/// Call clone initialization runtime routine to initialize \p sym's value.
+void initializeCloneAtRuntime(Fortran::lower::AbstractConverter &converter,
+ const Fortran::semantics::Symbol &sym,
+ Fortran::lower::SymMap &symMap);
+
/// Create a fir::GlobalOp given a module variable definition. This is intended
/// to be used when lowering a module definition, not when lowering variables
/// used from a module. For used variables instantiateVariable must directly be
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Derived.h b/flang/include/flang/Optimizer/Builder/Runtime/Derived.h
index d8b06f3..21a9a56 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Derived.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Derived.h
@@ -26,6 +26,12 @@
void genDerivedTypeInitialize(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value box);
+/// Generate call to derived type clone initialization runtime routine to
+/// initialize \p newBox from \p box.
+void genDerivedTypeInitializeClone(fir::FirOpBuilder &builder,
+ mlir::Location loc, mlir::Value newBox,
+ mlir::Value box);
+
/// Generate call to derived type destruction runtime routine to
/// destroy \p box.
void genDerivedTypeDestroy(fir::FirOpBuilder &builder, mlir::Location loc,
diff --git a/flang/include/flang/Runtime/derived-api.h b/flang/include/flang/Runtime/derived-api.h
index 79aa7d8..96374c5 100644
--- a/flang/include/flang/Runtime/derived-api.h
+++ b/flang/include/flang/Runtime/derived-api.h
@@ -32,6 +32,13 @@
void RTDECL(Initialize)(
const Descriptor &, const char *sourceFile = nullptr, int sourceLine = 0);
+// Initializes an object clone from the original object.
+// Each allocatable member of the clone is allocated with the same bounds as
+// in the original object, if it is also allocated in it.
+// The descriptor must be initialized and non-null.
+void RTDECL(InitializeClone)(const Descriptor &, const Descriptor &,
+ const char *sourceFile = nullptr, int sourceLine = 0);
+
// Finalizes an object and its components. Deallocates any
// allocatable/automatic components. Does not deallocate the descriptor's
// storage.
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 2c02aa2..17b794d1 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -557,8 +557,8 @@
return lookupSymbol(sym).getAddr();
}
- fir::ExtendedValue
- symBoxToExtendedValue(const Fortran::lower::SymbolBox &symBox) {
+ fir::ExtendedValue symBoxToExtendedValue(
+ const Fortran::lower::SymbolBox &symBox) override final {
return symBox.match(
[](const Fortran::lower::SymbolBox::Intrinsic &box)
-> fir::ExtendedValue { return box.getAddr(); },
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index ff122c2..9ee42d5 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -798,6 +798,20 @@
}
}
+/// Call clone initialization runtime routine to initialize \p sym's value.
+void Fortran::lower::initializeCloneAtRuntime(
+ Fortran::lower::AbstractConverter &converter,
+ const Fortran::semantics::Symbol &sym, Fortran::lower::SymMap &symMap) {
+ fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+ mlir::Location loc = converter.getCurrentLocation();
+ fir::ExtendedValue exv = converter.getSymbolExtendedValue(sym, &symMap);
+ mlir::Value newBox = builder.createBox(loc, exv);
+ lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(sym);
+ fir::ExtendedValue hexv = converter.symBoxToExtendedValue(hsb);
+ mlir::Value box = builder.createBox(loc, hexv);
+ fir::runtime::genDerivedTypeInitializeClone(builder, loc, newBox, box);
+}
+
enum class VariableCleanUp { Finalize, Deallocate };
/// Check whether a local variable needs to be finalized according to clause
/// 7.5.6.3 point 3 or if it is an allocatable that must be deallocated. Note
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 99835c5..cd31253 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -116,6 +116,23 @@
*sym, /*skipDefaultInit=*/isFirstPrivate);
(void)success;
assert(success && "Privatization failed due to existing binding");
+
+ // Initialize clone from original object if it has any allocatable member.
+ auto needInitClone = [&] {
+ if (isFirstPrivate)
+ return false;
+
+ SymbolBox sb = symTable.lookupSymbol(sym);
+ assert(sb);
+ mlir::Value addr = sb.getAddr();
+ assert(addr);
+ return hlfir::mayHaveAllocatableComponent(addr.getType());
+ };
+
+ if (needInitClone()) {
+ Fortran::lower::initializeCloneAtRuntime(converter, *sym, symTable);
+ callsInitClone = true;
+ }
}
void DataSharingProcessor::copyFirstPrivateSymbol(
@@ -165,8 +182,8 @@
// variables.
// Emit implicit barrier for linear clause. Maybe on somewhere else.
for (const semantics::Symbol *sym : allPrivatizedSymbols) {
- if (sym->test(semantics::Symbol::Flag::OmpFirstPrivate) &&
- sym->test(semantics::Symbol::Flag::OmpLastPrivate))
+ if (sym->test(semantics::Symbol::Flag::OmpLastPrivate) &&
+ (sym->test(semantics::Symbol::Flag::OmpFirstPrivate) || callsInitClone))
return true;
}
return false;
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index 2f5c69c..8c7a222 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -86,6 +86,7 @@
lower::pft::Evaluation &eval;
bool shouldCollectPreDeterminedSymbols;
bool useDelayedPrivatization;
+ bool callsInitClone = false;
lower::SymMap &symTable;
OMPConstructSymbolVisitor visitor;
diff --git a/flang/lib/Optimizer/Builder/Runtime/Derived.cpp b/flang/lib/Optimizer/Builder/Runtime/Derived.cpp
index fe7e2d1..25b4151 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Derived.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Derived.cpp
@@ -29,6 +29,21 @@
builder.create<fir::CallOp>(loc, func, args);
}
+void fir::runtime::genDerivedTypeInitializeClone(fir::FirOpBuilder &builder,
+ mlir::Location loc,
+ mlir::Value newBox,
+ mlir::Value box) {
+ auto func =
+ fir::runtime::getRuntimeFunc<mkRTKey(InitializeClone)>(loc, builder);
+ auto fTy = func.getFunctionType();
+ auto sourceFile = fir::factory::locationToFilename(builder, loc);
+ auto sourceLine =
+ fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
+ auto args = fir::runtime::createArguments(builder, loc, fTy, newBox, box,
+ sourceFile, sourceLine);
+ builder.create<fir::CallOp>(loc, func, args);
+}
+
void fir::runtime::genDerivedTypeDestroy(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value box) {
auto func = fir::runtime::getRuntimeFunc<mkRTKey(Destroy)>(loc, builder);
diff --git a/flang/runtime/derived-api.cpp b/flang/runtime/derived-api.cpp
index eca784b..c8ffd8e 100644
--- a/flang/runtime/derived-api.cpp
+++ b/flang/runtime/derived-api.cpp
@@ -31,6 +31,16 @@
}
}
+void RTDEF(InitializeClone)(const Descriptor &clone, const Descriptor &orig,
+ const char *sourceFile, int sourceLine) {
+ if (const DescriptorAddendum * addendum{clone.Addendum()}) {
+ if (const auto *derived{addendum->derivedType()}) {
+ Terminator terminator{sourceFile, sourceLine};
+ InitializeClone(clone, orig, *derived, terminator);
+ }
+ }
+}
+
void RTDEF(Destroy)(const Descriptor &descriptor) {
if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
if (const auto *derived{addendum->derivedType()}) {
diff --git a/flang/runtime/derived.cpp b/flang/runtime/derived.cpp
index 659f54f..7c164ff 100644
--- a/flang/runtime/derived.cpp
+++ b/flang/runtime/derived.cpp
@@ -122,6 +122,84 @@
return stat;
}
+RT_API_ATTRS int InitializeClone(const Descriptor &clone,
+ const Descriptor &orig, const typeInfo::DerivedType &derived,
+ Terminator &terminator, bool hasStat, const Descriptor *errMsg) {
+ const Descriptor &componentDesc{derived.component()};
+ std::size_t elements{orig.Elements()};
+ int stat{StatOk};
+
+ // Initialize each data component.
+ std::size_t components{componentDesc.Elements()};
+ for (std::size_t i{0}; i < components; ++i) {
+ const typeInfo::Component &comp{
+ *componentDesc.ZeroBasedIndexedElement<typeInfo::Component>(i)};
+ SubscriptValue at[maxRank];
+ orig.GetLowerBounds(at);
+ // Allocate allocatable components that are also allocated in the original
+ // object.
+ if (comp.genre() == typeInfo::Component::Genre::Allocatable) {
+ // Initialize each element.
+ for (std::size_t j{0}; j < elements; ++j, orig.IncrementSubscripts(at)) {
+ Descriptor &origDesc{
+ *orig.ElementComponent<Descriptor>(at, comp.offset())};
+ Descriptor &cloneDesc{
+ *clone.ElementComponent<Descriptor>(at, comp.offset())};
+ if (origDesc.IsAllocated()) {
+ cloneDesc.ApplyMold(origDesc, origDesc.rank());
+ stat = ReturnError(terminator, cloneDesc.Allocate(), errMsg, hasStat);
+ if (stat == StatOk) {
+ if (const DescriptorAddendum * addendum{cloneDesc.Addendum()}) {
+ if (const typeInfo::DerivedType *
+ derived{addendum->derivedType()}) {
+ if (!derived->noInitializationNeeded()) {
+ // Perform default initialization for the allocated element.
+ stat = Initialize(
+ cloneDesc, *derived, terminator, hasStat, errMsg);
+ }
+ // Initialize derived type's allocatables.
+ if (stat == StatOk) {
+ stat = InitializeClone(cloneDesc, origDesc, *derived,
+ terminator, hasStat, errMsg);
+ }
+ }
+ }
+ }
+ }
+ if (stat != StatOk) {
+ break;
+ }
+ }
+ } else if (comp.genre() == typeInfo::Component::Genre::Data &&
+ comp.derivedType()) {
+ // Handle nested derived types.
+ const typeInfo::DerivedType &compType{*comp.derivedType()};
+ SubscriptValue extents[maxRank];
+ GetComponentExtents(extents, comp, orig);
+ // Data components don't have descriptors, allocate them.
+ StaticDescriptor<maxRank, true, 0> origStaticDesc;
+ StaticDescriptor<maxRank, true, 0> cloneStaticDesc;
+ Descriptor &origDesc{origStaticDesc.descriptor()};
+ Descriptor &cloneDesc{cloneStaticDesc.descriptor()};
+ // Initialize each element.
+ for (std::size_t j{0}; j < elements; ++j, orig.IncrementSubscripts(at)) {
+ origDesc.Establish(compType,
+ orig.ElementComponent<char>(at, comp.offset()), comp.rank(),
+ extents);
+ cloneDesc.Establish(compType,
+ clone.ElementComponent<char>(at, comp.offset()), comp.rank(),
+ extents);
+ stat = InitializeClone(
+ cloneDesc, origDesc, compType, terminator, hasStat, errMsg);
+ if (stat != StatOk) {
+ break;
+ }
+ }
+ }
+ }
+ return stat;
+}
+
static RT_API_ATTRS const typeInfo::SpecialBinding *FindFinal(
const typeInfo::DerivedType &derived, int rank) {
if (const auto *ranked{derived.FindSpecialBinding(
diff --git a/flang/runtime/derived.h b/flang/runtime/derived.h
index b4863df..f5a1e21 100644
--- a/flang/runtime/derived.h
+++ b/flang/runtime/derived.h
@@ -26,6 +26,14 @@
RT_API_ATTRS int Initialize(const Descriptor &, const typeInfo::DerivedType &,
Terminator &, bool hasStat = false, const Descriptor *errMsg = nullptr);
+// Initializes an object clone from the original object.
+// Each allocatable member of the clone is allocated with the same bounds as
+// in the original object, if it is also allocated in it.
+// Returns a STAT= code (0 when all's well).
+RT_API_ATTRS int InitializeClone(const Descriptor &, const Descriptor &,
+ const typeInfo::DerivedType &, Terminator &, bool hasStat = false,
+ const Descriptor *errMsg = nullptr);
+
// Call FINAL subroutines, if any
RT_API_ATTRS void Finalize(
const Descriptor &, const typeInfo::DerivedType &derived, Terminator *);
diff --git a/flang/test/Lower/OpenMP/derived-type-allocatable.f90 b/flang/test/Lower/OpenMP/derived-type-allocatable.f90
new file mode 100644
index 0000000..d265954
--- /dev/null
+++ b/flang/test/Lower/OpenMP/derived-type-allocatable.f90
@@ -0,0 +1,94 @@
+! Test that derived type allocatable members of private copies are properly
+! initialized.
+!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+module m1
+ type x
+ integer, allocatable :: x1(:)
+ end type
+
+ type y
+ integer :: y1(10)
+ end type
+
+contains
+
+!CHECK-LABEL: omp.private {type = private} @_QMm1Ftest_nested
+!CHECK: fir.call @_FortranAInitializeClone
+!CHECK-NEXT: omp.yield
+
+!CHECK-LABEL: omp.private {type = private} @_QMm1Ftest_array_of_allocs
+!CHECK: fir.call @_FortranAInitializeClone
+!CHECK-NEXT: omp.yield
+
+!CHECK-LABEL: omp.private {type = firstprivate} @_QMm1Ftest_array
+!CHECK-NOT: fir.call @_FortranAInitializeClone
+!CHECK: omp.yield
+
+!CHECK-LABEL: omp.private {type = private} @_QMm1Ftest_array
+!CHECK: fir.call @_FortranAInitializeClone
+!CHECK-NEXT: omp.yield
+
+!CHECK-LABEL: omp.private {type = private} @_QMm1Ftest_scalar
+!CHECK: fir.call @_FortranAInitializeClone
+!CHECK-NEXT: omp.yield
+
+ subroutine test_scalar()
+ type(x) :: v
+ allocate(v%x1(5))
+
+ !$omp parallel private(v)
+ !$omp end parallel
+ end subroutine
+
+! Test omp sections lastprivate(v, v2)
+! - InitializeClone must not be called for v2, that doesn't have an
+! allocatable member.
+! - InitializeClone must be called for v, that has an allocatable member.
+! - To avoid race conditions between InitializeClone and lastprivate, a
+! barrier must be present after the initializations.
+!CHECK-LABEL: func @_QMm1Ptest_array
+!CHECK: fir.call @_FortranAInitializeClone
+!CHECK-NEXT: omp.barrier
+ subroutine test_array()
+ type(x) :: v(10)
+ type(y) :: v2(10)
+ allocate(v(1)%x1(5))
+
+ !$omp parallel private(v)
+ !$omp end parallel
+
+ !$omp parallel
+ !$omp sections lastprivate(v2, v)
+ !$omp end sections
+ !$omp end parallel
+
+ !$omp parallel firstprivate(v)
+ !$omp end parallel
+ end subroutine
+
+ subroutine test_array_of_allocs()
+ type(x), allocatable :: v(:)
+ allocate(v(10))
+ allocate(v(1)%x1(5))
+
+ !$omp parallel private(v)
+ !$omp end parallel
+ end subroutine
+
+ subroutine test_nested()
+ type dt1
+ integer, allocatable :: a(:)
+ end type
+
+ type dt2
+ type(dt1) :: d1
+ end type
+
+ type(dt2) :: d2
+ allocate(d2%d1%a(10))
+
+ !$omp parallel private(d2)
+ !$omp end parallel
+ end subroutine
+end module