[flang][runtime] Further work on speeding up work queue operations (#149189)
This patch avoids a trip through the work queue engine for cases on a
CPU where finalization and destruction actions during assignment were
handled without enqueueing another task.
diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp
index d642ed5..7cf4147 100644
--- a/flang-rt/lib/runtime/assign.cpp
+++ b/flang-rt/lib/runtime/assign.cpp
@@ -279,13 +279,15 @@
if (mustDeallocateLHS) {
// Convert the LHS into a temporary, then make it look deallocated.
toDeallocate_ = &tempDescriptor_.descriptor();
- persist_ = true; // tempDescriptor_ state must outlive child tickets
std::memcpy(
reinterpret_cast<void *>(toDeallocate_), &to_, to_.SizeInBytes());
to_.set_base_addr(nullptr);
if (toDerived_ && (flags_ & NeedFinalization)) {
- if (int status{workQueue.BeginFinalize(*toDeallocate_, *toDerived_)};
- status != StatOk && status != StatContinue) {
+ int status{workQueue.BeginFinalize(*toDeallocate_, *toDerived_)};
+ if (status == StatContinue) {
+ // tempDescriptor_ state must outlive pending child ticket
+ persist_ = true;
+ } else if (status != StatOk) {
return status;
}
flags_ &= ~NeedFinalization;
@@ -304,6 +306,9 @@
if (int stat{ReturnError(
workQueue.terminator(), newFrom.Allocate(kNoAsyncObject))};
stat != StatOk) {
+ if (stat == StatContinue) {
+ persist_ = true;
+ }
return stat;
}
if (HasDynamicComponent(*from_)) {
@@ -507,6 +512,7 @@
}
}
if (persist_) {
+ // tempDescriptor_ must outlive pending child ticket(s)
done_ = true;
return StatContinue;
} else {