[flang][cuda] Add asyncId to allocate entry point (#134947)

diff --git a/flang-rt/include/flang-rt/runtime/descriptor.h b/flang-rt/include/flang-rt/runtime/descriptor.h
index dcdfe07..9907e78 100644
--- a/flang-rt/include/flang-rt/runtime/descriptor.h
+++ b/flang-rt/include/flang-rt/runtime/descriptor.h
@@ -29,6 +29,9 @@
 #include <cstdio>
 #include <cstring>
 
+/// Value used for asyncId when no specific stream is specified.
+static constexpr std::int64_t kNoAsyncId = -1;
+
 namespace Fortran::runtime {
 
 class Terminator;
@@ -369,7 +372,7 @@
   // before calling.  It (re)computes the byte strides after
   // allocation.  Does not allocate automatic components or
   // perform default component initialization.
-  RT_API_ATTRS int Allocate();
+  RT_API_ATTRS int Allocate(std::int64_t asyncId);
   RT_API_ATTRS void SetByteStrides();
 
   // Deallocates storage; does not call FINAL subroutines or
diff --git a/flang-rt/include/flang-rt/runtime/reduction-templates.h b/flang-rt/include/flang-rt/runtime/reduction-templates.h
index 8c6f838..77f77a5 100644
--- a/flang-rt/include/flang-rt/runtime/reduction-templates.h
+++ b/flang-rt/include/flang-rt/runtime/reduction-templates.h
@@ -347,7 +347,7 @@
     // as the element size of the source.
     result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
         CFI_attribute_allocatable);
-    if (int stat{result.Allocate()}) {
+    if (int stat{result.Allocate(kNoAsyncId)}) {
       terminator.Crash(
           "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
     }
diff --git a/flang-rt/lib/cuda/allocatable.cpp b/flang-rt/lib/cuda/allocatable.cpp
index b773e80..432974d 100644
--- a/flang-rt/lib/cuda/allocatable.cpp
+++ b/flang-rt/lib/cuda/allocatable.cpp
@@ -53,7 +53,7 @@
   }
   // Perform the standard allocation.
   int stat{RTNAME(AllocatableAllocate)(
-      desc, hasStat, errMsg, sourceFile, sourceLine)};
+      desc, stream, hasStat, errMsg, sourceFile, sourceLine)};
   if (pinned) {
     // Set pinned according to stat. More infrastructre is needed to set it
     // closer to the actual allocation call.
diff --git a/flang-rt/lib/runtime/allocatable.cpp b/flang-rt/lib/runtime/allocatable.cpp
index a518161..6acce34 100644
--- a/flang-rt/lib/runtime/allocatable.cpp
+++ b/flang-rt/lib/runtime/allocatable.cpp
@@ -133,15 +133,17 @@
   }
 }
 
-int RTDEF(AllocatableAllocate)(Descriptor &descriptor, bool hasStat,
-    const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
+int RTDEF(AllocatableAllocate)(Descriptor &descriptor, std::int64_t asyncId,
+    bool hasStat, const Descriptor *errMsg, const char *sourceFile,
+    int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
   if (!descriptor.IsAllocatable()) {
     return ReturnError(terminator, StatInvalidDescriptor, errMsg, hasStat);
   } else if (descriptor.IsAllocated()) {
     return ReturnError(terminator, StatBaseNotNull, errMsg, hasStat);
   } else {
-    int stat{ReturnError(terminator, descriptor.Allocate(), errMsg, hasStat)};
+    int stat{
+        ReturnError(terminator, descriptor.Allocate(asyncId), errMsg, hasStat)};
     if (stat == StatOk) {
       if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
         if (const auto *derived{addendum->derivedType()}) {
@@ -160,7 +162,7 @@
     const Descriptor &source, bool hasStat, const Descriptor *errMsg,
     const char *sourceFile, int sourceLine) {
   int stat{RTNAME(AllocatableAllocate)(
-      alloc, hasStat, errMsg, sourceFile, sourceLine)};
+      alloc, /*asyncId=*/-1, hasStat, errMsg, sourceFile, sourceLine)};
   if (stat == StatOk) {
     Terminator terminator{sourceFile, sourceLine};
     DoFromSourceAssign(alloc, source, terminator);
diff --git a/flang-rt/lib/runtime/array-constructor.cpp b/flang-rt/lib/runtime/array-constructor.cpp
index 7e267e7..67b3b5e 100644
--- a/flang-rt/lib/runtime/array-constructor.cpp
+++ b/flang-rt/lib/runtime/array-constructor.cpp
@@ -50,8 +50,8 @@
           initialAllocationSize(fromElements, to.ElementBytes())};
       to.GetDimension(0).SetBounds(1, allocationSize);
       RTNAME(AllocatableAllocate)
-      (to, /*hasStat=*/false, /*errMsg=*/nullptr, vector.sourceFile,
-          vector.sourceLine);
+      (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
+          vector.sourceFile, vector.sourceLine);
       to.GetDimension(0).SetBounds(1, fromElements);
       vector.actualAllocationSize = allocationSize;
     } else {
@@ -59,8 +59,8 @@
       // first value: there should be no reallocation.
       RUNTIME_CHECK(terminator, previousToElements >= fromElements);
       RTNAME(AllocatableAllocate)
-      (to, /*hasStat=*/false, /*errMsg=*/nullptr, vector.sourceFile,
-          vector.sourceLine);
+      (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
+          vector.sourceFile, vector.sourceLine);
       vector.actualAllocationSize = previousToElements;
     }
   } else {
diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp
index e919aea4..4a813cd 100644
--- a/flang-rt/lib/runtime/assign.cpp
+++ b/flang-rt/lib/runtime/assign.cpp
@@ -99,7 +99,7 @@
     toDim.SetByteStride(stride);
     stride *= toDim.Extent();
   }
-  int result{ReturnError(terminator, to.Allocate())};
+  int result{ReturnError(terminator, to.Allocate(kNoAsyncId))};
   if (result == StatOk && derived && !derived->noInitializationNeeded()) {
     result = ReturnError(terminator, Initialize(to, *derived, terminator));
   }
@@ -277,7 +277,7 @@
       // entity, otherwise, the Deallocate() below will not
       // free the descriptor memory.
       newFrom.raw().attribute = CFI_attribute_allocatable;
-      auto stat{ReturnError(terminator, newFrom.Allocate())};
+      auto stat{ReturnError(terminator, newFrom.Allocate(kNoAsyncId))};
       if (stat == StatOk) {
         if (HasDynamicComponent(from)) {
           // If 'from' has allocatable/automatic component, we cannot
diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index 10cf27c..c890fd0 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -117,7 +117,7 @@
   for (int j{0}; j < rank; ++j) {
     result.GetDimension(j).SetBounds(1, ub[j]);
   }
-  if (result.Allocate() != CFI_SUCCESS) {
+  if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
     terminator.Crash("Compare: could not allocate storage for result");
   }
   std::size_t xChars{x.ElementBytes() >> shift<CHAR>};
@@ -172,7 +172,7 @@
   for (int j{0}; j < rank; ++j) {
     result.GetDimension(j).SetBounds(1, ub[j]);
   }
-  if (result.Allocate() != CFI_SUCCESS) {
+  if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
     terminator.Crash("ADJUSTL/R: could not allocate storage for result");
   }
   for (SubscriptValue resultAt{0}; elements-- > 0;
@@ -226,7 +226,7 @@
   for (int j{0}; j < rank; ++j) {
     result.GetDimension(j).SetBounds(1, ub[j]);
   }
-  if (result.Allocate() != CFI_SUCCESS) {
+  if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
     terminator.Crash("LEN_TRIM: could not allocate storage for result");
   }
   std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
@@ -408,7 +408,7 @@
   for (int j{0}; j < rank; ++j) {
     result.GetDimension(j).SetBounds(1, ub[j]);
   }
-  if (result.Allocate() != CFI_SUCCESS) {
+  if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
     terminator.Crash("SCAN/VERIFY: could not allocate storage for result");
   }
   std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
@@ -511,7 +511,7 @@
     for (int j{0}; j < rank; ++j) {
       accumulator.GetDimension(j).SetBounds(1, ub[j]);
     }
-    RUNTIME_CHECK(terminator, accumulator.Allocate() == CFI_SUCCESS);
+    RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS);
   }
   for (CHAR *result{accumulator.OffsetElement<CHAR>()}; elements-- > 0;
        accumData += accumChars, result += chars, x.IncrementSubscripts(xAt)) {
@@ -587,7 +587,7 @@
   for (int j{0}; j < rank; ++j) {
     accumulator.GetDimension(j).SetBounds(1, ub[j]);
   }
-  if (accumulator.Allocate() != CFI_SUCCESS) {
+  if (accumulator.Allocate(kNoAsyncId) != CFI_SUCCESS) {
     terminator.Crash(
         "CharacterConcatenate: could not allocate storage for result");
   }
@@ -610,7 +610,7 @@
   accumulator.set_base_addr(nullptr);
   std::size_t oldLen{accumulator.ElementBytes()};
   accumulator.raw().elem_len += chars;
-  RUNTIME_CHECK(terminator, accumulator.Allocate() == CFI_SUCCESS);
+  RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS);
   std::memcpy(accumulator.OffsetElement<char>(oldLen), from, chars);
   FreeMemory(old);
 }
@@ -812,7 +812,7 @@
   std::size_t origBytes{string.ElementBytes()};
   result.Establish(string.type(), origBytes * ncopies, nullptr, 0, nullptr,
       CFI_attribute_allocatable);
-  if (result.Allocate() != CFI_SUCCESS) {
+  if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
     terminator.Crash("REPEAT could not allocate storage for result");
   }
   const char *from{string.OffsetElement()};
@@ -846,7 +846,7 @@
   }
   result.Establish(string.type(), resultBytes, nullptr, 0, nullptr,
       CFI_attribute_allocatable);
-  RUNTIME_CHECK(terminator, result.Allocate() == CFI_SUCCESS);
+  RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncId) == CFI_SUCCESS);
   std::memcpy(result.OffsetElement(), string.OffsetElement(), resultBytes);
 }
 
diff --git a/flang-rt/lib/runtime/copy.cpp b/flang-rt/lib/runtime/copy.cpp
index 5956642..3a0f98c 100644
--- a/flang-rt/lib/runtime/copy.cpp
+++ b/flang-rt/lib/runtime/copy.cpp
@@ -171,7 +171,8 @@
                 *reinterpret_cast<Descriptor *>(toPtr + component->offset())};
             if (toDesc.raw().base_addr != nullptr) {
               toDesc.set_base_addr(nullptr);
-              RUNTIME_CHECK(terminator, toDesc.Allocate() == CFI_SUCCESS);
+              RUNTIME_CHECK(
+                  terminator, toDesc.Allocate(/*asyncId=*/-1) == CFI_SUCCESS);
               const Descriptor &fromDesc{*reinterpret_cast<const Descriptor *>(
                   fromPtr + component->offset())};
               copyStack.emplace(toDesc, fromDesc);
diff --git a/flang-rt/lib/runtime/derived.cpp b/flang-rt/lib/runtime/derived.cpp
index 87e4b29..c46ea80 100644
--- a/flang-rt/lib/runtime/derived.cpp
+++ b/flang-rt/lib/runtime/derived.cpp
@@ -51,7 +51,8 @@
         comp.EstablishDescriptor(allocDesc, instance, terminator);
         allocDesc.raw().attribute = CFI_attribute_allocatable;
         if (comp.genre() == typeInfo::Component::Genre::Automatic) {
-          stat = ReturnError(terminator, allocDesc.Allocate(), errMsg, hasStat);
+          stat = ReturnError(
+              terminator, allocDesc.Allocate(kNoAsyncId), errMsg, hasStat);
           if (stat == StatOk) {
             if (const DescriptorAddendum * addendum{allocDesc.Addendum()}) {
               if (const auto *derived{addendum->derivedType()}) {
@@ -151,7 +152,8 @@
             *clone.ElementComponent<Descriptor>(at, comp.offset())};
         if (origDesc.IsAllocated()) {
           cloneDesc.ApplyMold(origDesc, origDesc.rank());
-          stat = ReturnError(terminator, cloneDesc.Allocate(), errMsg, hasStat);
+          stat = ReturnError(
+              terminator, cloneDesc.Allocate(kNoAsyncId), errMsg, hasStat);
           if (stat == StatOk) {
             if (const DescriptorAddendum * addendum{cloneDesc.Addendum()}) {
               if (const typeInfo::DerivedType *
@@ -258,7 +260,7 @@
         copy.raw().attribute = CFI_attribute_allocatable;
         Terminator stubTerminator{"CallFinalProcedure() in Fortran runtime", 0};
         RUNTIME_CHECK(terminator ? *terminator : stubTerminator,
-            copy.Allocate() == CFI_SUCCESS);
+            copy.Allocate(kNoAsyncId) == CFI_SUCCESS);
         ShallowCopyDiscontiguousToContiguous(copy, descriptor);
         argDescriptor = &copy;
       }
diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp
index c660d4f..3debf53 100644
--- a/flang-rt/lib/runtime/descriptor.cpp
+++ b/flang-rt/lib/runtime/descriptor.cpp
@@ -158,7 +158,7 @@
 #endif
 }
 
-RT_API_ATTRS int Descriptor::Allocate() {
+RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
   std::size_t elementBytes{ElementBytes()};
   if (static_cast<std::int64_t>(elementBytes) < 0) {
     // F'2023 7.4.4.2 p5: "If the character length parameter value evaluates
@@ -170,7 +170,7 @@
   // Zero size allocation is possible in Fortran and the resulting
   // descriptor must be allocated/associated. Since std::malloc(0)
   // result is implementation defined, always allocate at least one byte.
-  void *p{alloc(byteSize ? byteSize : 1, /*asyncId=*/-1)};
+  void *p{alloc(byteSize ? byteSize : 1, asyncId)};
   if (!p) {
     return CFI_ERROR_MEM_ALLOCATION;
   }
diff --git a/flang-rt/lib/runtime/extrema.cpp b/flang-rt/lib/runtime/extrema.cpp
index 3d84daa..4c7f8e8b 100644
--- a/flang-rt/lib/runtime/extrema.cpp
+++ b/flang-rt/lib/runtime/extrema.cpp
@@ -152,7 +152,7 @@
       CFI_attribute_allocatable);
   result.GetDimension(0).SetBounds(1, extent[0]);
   Terminator terminator{source, line};
-  if (int stat{result.Allocate()}) {
+  if (int stat{result.Allocate(kNoAsyncId)}) {
     terminator.Crash(
         "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
   }
@@ -181,7 +181,7 @@
       CFI_attribute_allocatable);
   result.GetDimension(0).SetBounds(1, extent[0]);
   Terminator terminator{source, line};
-  if (int stat{result.Allocate()}) {
+  if (int stat{result.Allocate(kNoAsyncId)}) {
     terminator.Crash(
         "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
   }
diff --git a/flang-rt/lib/runtime/findloc.cpp b/flang-rt/lib/runtime/findloc.cpp
index 95986ae..e3e9895 100644
--- a/flang-rt/lib/runtime/findloc.cpp
+++ b/flang-rt/lib/runtime/findloc.cpp
@@ -220,7 +220,7 @@
       CFI_attribute_allocatable);
   result.GetDimension(0).SetBounds(1, extent[0]);
   Terminator terminator{source, line};
-  if (int stat{result.Allocate()}) {
+  if (int stat{result.Allocate(kNoAsyncId)}) {
     terminator.Crash(
         "FINDLOC: could not allocate memory for result; STAT=%d", stat);
   }
diff --git a/flang-rt/lib/runtime/matmul-transpose.cpp b/flang-rt/lib/runtime/matmul-transpose.cpp
index 8666167..17987fb 100644
--- a/flang-rt/lib/runtime/matmul-transpose.cpp
+++ b/flang-rt/lib/runtime/matmul-transpose.cpp
@@ -183,7 +183,7 @@
     for (int j{0}; j < resRank; ++j) {
       result.GetDimension(j).SetBounds(1, extent[j]);
     }
-    if (int stat{result.Allocate()}) {
+    if (int stat{result.Allocate(kNoAsyncId)}) {
       terminator.Crash(
           "MATMUL-TRANSPOSE: could not allocate memory for result; STAT=%d",
           stat);
diff --git a/flang-rt/lib/runtime/matmul.cpp b/flang-rt/lib/runtime/matmul.cpp
index 693e510..0ff92ce 100644
--- a/flang-rt/lib/runtime/matmul.cpp
+++ b/flang-rt/lib/runtime/matmul.cpp
@@ -255,7 +255,7 @@
     for (int j{0}; j < resRank; ++j) {
       result.GetDimension(j).SetBounds(1, extent[j]);
     }
-    if (int stat{result.Allocate()}) {
+    if (int stat{result.Allocate(kNoAsyncId)}) {
       terminator.Crash(
           "MATMUL: could not allocate memory for result; STAT=%d", stat);
     }
diff --git a/flang-rt/lib/runtime/misc-intrinsic.cpp b/flang-rt/lib/runtime/misc-intrinsic.cpp
index b7335e9..2fde859 100644
--- a/flang-rt/lib/runtime/misc-intrinsic.cpp
+++ b/flang-rt/lib/runtime/misc-intrinsic.cpp
@@ -30,7 +30,7 @@
   if (const DescriptorAddendum * addendum{mold.Addendum()}) {
     *result.Addendum() = *addendum;
   }
-  if (int stat{result.Allocate()}) {
+  if (int stat{result.Allocate(kNoAsyncId)}) {
     Terminator{sourceFile, line}.Crash(
         "TRANSFER: could not allocate memory for result; STAT=%d", stat);
   }
diff --git a/flang-rt/lib/runtime/temporary-stack.cpp b/flang-rt/lib/runtime/temporary-stack.cpp
index ea89d0c..3a952b1 100644
--- a/flang-rt/lib/runtime/temporary-stack.cpp
+++ b/flang-rt/lib/runtime/temporary-stack.cpp
@@ -148,7 +148,7 @@
   if constexpr (COPY_VALUES) {
     // copy the data pointed to by the box
     box.set_base_addr(nullptr);
-    box.Allocate();
+    box.Allocate(kNoAsyncId);
     RTNAME(AssignTemporary)
     (box, source, terminator_.sourceFileName(), terminator_.sourceLine());
   }
diff --git a/flang-rt/lib/runtime/tools.cpp b/flang-rt/lib/runtime/tools.cpp
index b9d9ca4..5d6e35f 100644
--- a/flang-rt/lib/runtime/tools.cpp
+++ b/flang-rt/lib/runtime/tools.cpp
@@ -261,7 +261,7 @@
   for (int j{0}; j + 1 < xRank; ++j) {
     result.GetDimension(j).SetBounds(1, resultExtent[j]);
   }
-  if (int stat{result.Allocate()}) {
+  if (int stat{result.Allocate(kNoAsyncId)}) {
     terminator.Crash(
         "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
   }
diff --git a/flang-rt/lib/runtime/transformational.cpp b/flang-rt/lib/runtime/transformational.cpp
index ee3686a..a7d5a48 100644
--- a/flang-rt/lib/runtime/transformational.cpp
+++ b/flang-rt/lib/runtime/transformational.cpp
@@ -132,7 +132,7 @@
   for (int j{0}; j < rank; ++j) {
     result.GetDimension(j).SetBounds(1, extent[j]);
   }
-  if (int stat{result.Allocate()}) {
+  if (int stat{result.Allocate(kNoAsyncId)}) {
     terminator.Crash(
         "%s: Could not allocate memory for result (stat=%d)", function, stat);
   }
@@ -157,7 +157,7 @@
   for (int j{0}; j < rank; ++j) {
     result.GetDimension(j).SetBounds(1, extent[j]);
   }
-  if (int stat{result.Allocate()}) {
+  if (int stat{result.Allocate(kNoAsyncId)}) {
     terminator.Crash(
         "%s: Could not allocate memory for result (stat=%d)", function, stat);
   }
diff --git a/flang-rt/unittests/Evaluate/reshape.cpp b/flang-rt/unittests/Evaluate/reshape.cpp
index 2abe46c..67a0be1 100644
--- a/flang-rt/unittests/Evaluate/reshape.cpp
+++ b/flang-rt/unittests/Evaluate/reshape.cpp
@@ -26,7 +26,7 @@
   for (int j{0}; j < 3; ++j) {
     source->GetDimension(j).SetBounds(1, sourceExtent[j]);
   }
-  TEST(source->Allocate() == CFI_SUCCESS);
+  TEST(source->Allocate(kNoAsyncId) == CFI_SUCCESS);
   TEST(source->IsAllocated());
   MATCH(2, source->GetDimension(0).Extent());
   MATCH(3, source->GetDimension(1).Extent());
diff --git a/flang-rt/unittests/Runtime/Allocatable.cpp b/flang-rt/unittests/Runtime/Allocatable.cpp
index 4702f48..a6fcdd0 100644
--- a/flang-rt/unittests/Runtime/Allocatable.cpp
+++ b/flang-rt/unittests/Runtime/Allocatable.cpp
@@ -26,7 +26,7 @@
   auto b{createAllocatable(TypeCategory::Integer, 4)};
   // ALLOCATE(a(20))
   a->GetDimension(0).SetBounds(1, 20);
-  a->Allocate();
+  a->Allocate(kNoAsyncId);
 
   EXPECT_TRUE(a->IsAllocated());
   EXPECT_FALSE(b->IsAllocated());
@@ -46,7 +46,7 @@
   // move_alloc with errMsg
   auto errMsg{Descriptor::Create(
       sizeof(char), 64, nullptr, 0, nullptr, CFI_attribute_allocatable)};
-  errMsg->Allocate();
+  errMsg->Allocate(kNoAsyncId);
   RTNAME(MoveAlloc)(*b, *a, nullptr, false, errMsg.get(), __FILE__, __LINE__);
   EXPECT_FALSE(a->IsAllocated());
   EXPECT_TRUE(b->IsAllocated());
diff --git a/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp b/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
index 3f759a6..1c8ded0 100644
--- a/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
+++ b/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
@@ -42,7 +42,7 @@
   CUDA_REPORT_IF_ERROR(cudaMalloc(&device_desc, a->SizeInBytes()));
 
   RTNAME(AllocatableAllocate)
-  (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+  (*a, kNoAsyncId, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
   EXPECT_TRUE(a->IsAllocated());
   RTNAME(CUFDescriptorSync)(device_desc, a.get(), __FILE__, __LINE__);
   cudaDeviceSynchronize();
diff --git a/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp
index 9bda327..2f1dc64 100644
--- a/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp
+++ b/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp
@@ -35,7 +35,8 @@
   EXPECT_FALSE(a->HasAddendum());
   RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
   RTNAME(AllocatableAllocate)
-  (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+  (*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
+      __LINE__);
   EXPECT_TRUE(a->IsAllocated());
   RTNAME(AllocatableDeallocate)
   (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
@@ -53,7 +54,8 @@
   EXPECT_FALSE(a->HasAddendum());
   RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
   RTNAME(AllocatableAllocate)
-  (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+  (*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
+      __LINE__);
   EXPECT_TRUE(a->IsAllocated());
   RTNAME(AllocatableDeallocate)
   (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
diff --git a/flang-rt/unittests/Runtime/CUDA/Memory.cpp b/flang-rt/unittests/Runtime/CUDA/Memory.cpp
index 37ae59e..b361207 100644
--- a/flang-rt/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang-rt/unittests/Runtime/CUDA/Memory.cpp
@@ -50,7 +50,8 @@
   EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
   RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10);
   RTNAME(AllocatableAllocate)
-  (*dev, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+  (*dev, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
+      __LINE__);
   EXPECT_TRUE(dev->IsAllocated());
 
   // Create temp array to transfer to device.
diff --git a/flang-rt/unittests/Runtime/CharacterTest.cpp b/flang-rt/unittests/Runtime/CharacterTest.cpp
index 83ec9b3..c158e0c 100644
--- a/flang-rt/unittests/Runtime/CharacterTest.cpp
+++ b/flang-rt/unittests/Runtime/CharacterTest.cpp
@@ -35,7 +35,7 @@
   for (int j{0}; j < rank; ++j) {
     descriptor->GetDimension(j).SetBounds(2, shape[j] + 1);
   }
-  if (descriptor->Allocate() != 0) {
+  if (descriptor->Allocate(kNoAsyncId) != 0) {
     return nullptr;
   }
 
diff --git a/flang-rt/unittests/Runtime/CommandTest.cpp b/flang-rt/unittests/Runtime/CommandTest.cpp
index 72fe762..9d0da4c 100644
--- a/flang-rt/unittests/Runtime/CommandTest.cpp
+++ b/flang-rt/unittests/Runtime/CommandTest.cpp
@@ -26,7 +26,7 @@
 static OwningPtr<Descriptor> CreateEmptyCharDescriptor() {
   OwningPtr<Descriptor> descriptor{Descriptor::Create(
       sizeof(char), n, nullptr, 0, nullptr, CFI_attribute_allocatable)};
-  if (descriptor->Allocate() != 0) {
+  if (descriptor->Allocate(kNoAsyncId) != 0) {
     return nullptr;
   }
   return descriptor;
@@ -36,7 +36,7 @@
   std::size_t n{std::strlen(value)};
   OwningPtr<Descriptor> descriptor{Descriptor::Create(
       sizeof(char), n, nullptr, 0, nullptr, CFI_attribute_allocatable)};
-  if (descriptor->Allocate() != 0) {
+  if (descriptor->Allocate(kNoAsyncId) != 0) {
     return nullptr;
   }
   std::memcpy(descriptor->OffsetElement(), value, n);
@@ -47,7 +47,7 @@
 static OwningPtr<Descriptor> EmptyIntDescriptor() {
   OwningPtr<Descriptor> descriptor{Descriptor::Create(TypeCategory::Integer,
       kind, nullptr, 0, nullptr, CFI_attribute_allocatable)};
-  if (descriptor->Allocate() != 0) {
+  if (descriptor->Allocate(kNoAsyncId) != 0) {
     return nullptr;
   }
   return descriptor;
@@ -57,7 +57,7 @@
 static OwningPtr<Descriptor> IntDescriptor(const int &value) {
   OwningPtr<Descriptor> descriptor{Descriptor::Create(TypeCategory::Integer,
       kind, nullptr, 0, nullptr, CFI_attribute_allocatable)};
-  if (descriptor->Allocate() != 0) {
+  if (descriptor->Allocate(kNoAsyncId) != 0) {
     return nullptr;
   }
   std::memcpy(descriptor->OffsetElement<int>(), &value, sizeof(int));
diff --git a/flang-rt/unittests/Runtime/TemporaryStack.cpp b/flang-rt/unittests/Runtime/TemporaryStack.cpp
index ff8efe1..3291794 100644
--- a/flang-rt/unittests/Runtime/TemporaryStack.cpp
+++ b/flang-rt/unittests/Runtime/TemporaryStack.cpp
@@ -59,7 +59,7 @@
   Descriptor &outputDesc2{testDescriptorStorage[2].descriptor()};
   inputDesc.Establish(code, elementBytes, descriptorPtr, rank, extent);
 
-  inputDesc.Allocate();
+  inputDesc.Allocate(kNoAsyncId);
   ASSERT_EQ(inputDesc.IsAllocated(), true);
   uint32_t *inputData = static_cast<uint32_t *>(inputDesc.raw().base_addr);
   for (std::size_t i = 0; i < inputDesc.Elements(); ++i) {
@@ -123,7 +123,7 @@
       boxDims.extent = extent[dim];
       boxDims.sm = elementBytes;
     }
-    desc->Allocate();
+    desc->Allocate(kNoAsyncId);
 
     // fill the array with some data to test
     for (uint32_t i = 0; i < desc->Elements(); ++i) {
diff --git a/flang-rt/unittests/Runtime/tools.h b/flang-rt/unittests/Runtime/tools.h
index 36a4c2b..a1eba45 100644
--- a/flang-rt/unittests/Runtime/tools.h
+++ b/flang-rt/unittests/Runtime/tools.h
@@ -42,7 +42,7 @@
   for (int j{0}; j < rank; ++j) {
     result->GetDimension(j).SetBounds(1, shape[j]);
   }
-  int stat{result->Allocate()};
+  int stat{result->Allocate(kNoAsyncId)};
   EXPECT_EQ(stat, 0) << stat;
   EXPECT_LE(data.size(), result->Elements());
   char *p{result->OffsetElement<char>()};
diff --git a/flang/include/flang/Runtime/allocatable.h b/flang/include/flang/Runtime/allocatable.h
index 714d85e..6895f8a 100644
--- a/flang/include/flang/Runtime/allocatable.h
+++ b/flang/include/flang/Runtime/allocatable.h
@@ -94,9 +94,9 @@
 // Successfully allocated memory is initialized if the allocatable has a
 // derived type, and is always initialized by AllocatableAllocateSource().
 // Performs all necessary coarray synchronization and validation actions.
-int RTDECL(AllocatableAllocate)(Descriptor &, bool hasStat = false,
-    const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
-    int sourceLine = 0);
+int RTDECL(AllocatableAllocate)(Descriptor &, std::int64_t asyncId = -1,
+    bool hasStat = false, const Descriptor *errMsg = nullptr,
+    const char *sourceFile = nullptr, int sourceLine = 0);
 int RTDECL(AllocatableAllocateSource)(Descriptor &, const Descriptor &source,
     bool hasStat = false, const Descriptor *errMsg = nullptr,
     const char *sourceFile = nullptr, int sourceLine = 0);
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 9938bd5..8d0444a 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -186,9 +186,14 @@
           ? fir::runtime::getRuntimeFunc<mkRTKey(PointerAllocate)>(loc, builder)
           : fir::runtime::getRuntimeFunc<mkRTKey(AllocatableAllocate)>(loc,
                                                                        builder);
-  llvm::SmallVector<mlir::Value> args{
-      box.getAddr(), errorManager.hasStat, errorManager.errMsgAddr,
-      errorManager.sourceFile, errorManager.sourceLine};
+  llvm::SmallVector<mlir::Value> args{box.getAddr()};
+  if (!box.isPointer())
+    args.push_back(
+        builder.createIntegerConstant(loc, builder.getI64Type(), -1));
+  args.push_back(errorManager.hasStat);
+  args.push_back(errorManager.errMsgAddr);
+  args.push_back(errorManager.sourceFile);
+  args.push_back(errorManager.sourceLine);
   llvm::SmallVector<mlir::Value> operands;
   for (auto [fst, snd] : llvm::zip(args, callee.getFunctionType().getInputs()))
     operands.emplace_back(builder.createConvert(loc, snd, fst));
diff --git a/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp b/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp
index 70a88ff..28452d3 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp
@@ -76,16 +76,19 @@
   mlir::func::FuncOp func{
       fir::runtime::getRuntimeFunc<mkRTKey(AllocatableAllocate)>(loc, builder)};
   mlir::FunctionType fTy{func.getFunctionType()};
+  mlir::Value asyncId =
+      builder.createIntegerConstant(loc, builder.getI64Type(), -1);
   mlir::Value sourceFile{fir::factory::locationToFilename(builder, loc)};
   mlir::Value sourceLine{
-      fir::factory::locationToLineNo(builder, loc, fTy.getInput(4))};
+      fir::factory::locationToLineNo(builder, loc, fTy.getInput(5))};
   if (!hasStat)
     hasStat = builder.createBool(loc, false);
   if (!errMsg) {
     mlir::Type boxNoneTy = fir::BoxType::get(builder.getNoneType());
     errMsg = builder.create<fir::AbsentOp>(loc, boxNoneTy).getResult();
   }
-  llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
-      builder, loc, fTy, desc, hasStat, errMsg, sourceFile, sourceLine)};
+  llvm::SmallVector<mlir::Value> args{
+      fir::runtime::createArguments(builder, loc, fTy, desc, asyncId, hasStat,
+                                    errMsg, sourceFile, sourceLine)};
   builder.create<fir::CallOp>(loc, func, args);
 }
diff --git a/flang/test/HLFIR/elemental-codegen.fir b/flang/test/HLFIR/elemental-codegen.fir
index c05c05c..a715479 100644
--- a/flang/test/HLFIR/elemental-codegen.fir
+++ b/flang/test/HLFIR/elemental-codegen.fir
@@ -191,7 +191,7 @@
 // CHECK:           %[[VAL_35:.*]] = fir.absent !fir.box<none>
 // CHECK:           %[[VAL_36:.*]] = fir.convert %[[VAL_4]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
 // CHECK:           %[[VAL_37:.*]] = fir.convert %[[VAL_31]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK:           %[[VAL_38:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_36]], %[[VAL_34]], %[[VAL_35]], %[[VAL_37]], %[[VAL_33]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK:           %[[VAL_38:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_36]], %{{.*}}, %[[VAL_34]], %[[VAL_35]], %[[VAL_37]], %[[VAL_33]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 // CHECK:           %[[VAL_12:.*]] = arith.constant true
 // CHECK:           %[[VAL_39:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
 // CHECK:           %[[VAL_40:.*]] = arith.constant 1 : index
@@ -275,7 +275,7 @@
 // CHECK:           %[[VAL_36:.*]] = fir.absent !fir.box<none>
 // CHECK:           %[[VAL_37:.*]] = fir.convert %[[VAL_5]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
 // CHECK:           %[[VAL_38:.*]] = fir.convert %[[VAL_32]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK:           %[[VAL_39:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_37]], %[[VAL_35]], %[[VAL_36]], %[[VAL_38]], %[[VAL_34]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK:           %[[VAL_39:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_37]], %{{.*}}, %[[VAL_35]], %[[VAL_36]], %[[VAL_38]], %[[VAL_34]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 // CHECK:           %[[VAL_13:.*]] = arith.constant true
 // CHECK:           %[[VAL_40:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
 // CHECK:           %[[VAL_41:.*]] = arith.constant 1 : index
@@ -328,7 +328,7 @@
 // CHECK:           %[[VAL_85:.*]] = fir.absent !fir.box<none>
 // CHECK:           %[[VAL_86:.*]] = fir.convert %[[VAL_4]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
 // CHECK:           %[[VAL_87:.*]] = fir.convert %[[VAL_81]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK:           %[[VAL_88:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_86]], %[[VAL_84]], %[[VAL_85]], %[[VAL_87]], %[[VAL_83]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK:           %[[VAL_88:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_86]], %{{.*}}, %[[VAL_84]], %[[VAL_85]], %[[VAL_87]], %[[VAL_83]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 // CHECK:           %[[VAL_62:.*]] = arith.constant true
 // CHECK:           %[[VAL_89:.*]] = fir.load %[[VAL_63]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
 // CHECK:           %[[VAL_90:.*]] = arith.constant 1 : index
diff --git a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
index 17e1973..950dc30 100644
--- a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
+++ b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
@@ -473,6 +473,6 @@
 end module
 
 ! CHECK-LABEL: func.func @_QMacc_declare_post_action_statPinit()
-! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEx_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEx_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 ! CHECK: fir.if
-! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
diff --git a/flang/test/Lower/OpenACC/acc-declare.f90 b/flang/test/Lower/OpenACC/acc-declare.f90
index 05d1dc2..889cdef 100644
--- a/flang/test/Lower/OpenACC/acc-declare.f90
+++ b/flang/test/Lower/OpenACC/acc-declare.f90
@@ -434,6 +434,6 @@
 end module
 
 ! CHECK-LABEL: func.func @_QMacc_declare_post_action_statPinit()
-! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEx_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEx_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 ! CHECK: fir.if
-! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90
index 77ad9c1..dd8671d 100644
--- a/flang/test/Lower/allocatable-polymorphic.f90
+++ b/flang/test/Lower/allocatable-polymorphic.f90
@@ -267,7 +267,7 @@
 ! CHECK: %[[C0:.*]] = arith.constant 0 : i32
 ! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[P_CAST]], %[[TYPE_DESC_P1_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<none>, i32, i32) -> ()
 ! CHECK: %[[P_CAST:.*]] = fir.convert %[[P_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[P_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[P_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 
 ! CHECK: %[[TYPE_DESC_P1:.*]] = fir.type_desc !fir.type<_QMpolyTp1{a:i32,b:i32}>
 ! CHECK: %[[C1_CAST:.*]] = fir.convert %[[C1_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
@@ -276,7 +276,7 @@
 ! CHECK: %[[C0:.*]] = arith.constant 0 : i32
 ! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[C1_CAST]], %[[TYPE_DESC_P1_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<none>, i32, i32) -> ()
 ! CHECK: %[[C1_CAST:.*]] = fir.convert %[[C1_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C1_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C1_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 
 ! CHECK: %[[TYPE_DESC_P2:.*]] = fir.type_desc !fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>
 ! CHECK: %[[C2_CAST:.*]] = fir.convert %[[C2_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
@@ -285,7 +285,7 @@
 ! CHECK: %[[C0:.*]] = arith.constant 0 : i32
 ! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[C2_CAST]], %[[TYPE_DESC_P2_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<none>, i32, i32) -> ()
 ! CHECK: %[[C2_CAST:.*]] = fir.convert %[[C2_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C2_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C2_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 
 ! CHECK: %[[TYPE_DESC_P1:.*]] = fir.type_desc !fir.type<_QMpolyTp1{a:i32,b:i32}>
 ! CHECK: %[[C3_CAST:.*]] = fir.convert %[[C3_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>>) -> !fir.ref<!fir.box<none>>
@@ -300,7 +300,7 @@
 ! CHECK: %[[C10_I64:.*]] = fir.convert %[[C10]] : (i32) -> i64
 ! CHECK: fir.call @_FortranAAllocatableSetBounds(%[[C3_CAST]], %[[C0]], %[[C1_I64]], %[[C10_I64]]) {{.*}}: (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
 ! CHECK: %[[C3_CAST:.*]] = fir.convert %[[C3_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C3_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C3_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 
 ! CHECK: %[[TYPE_DESC_P2:.*]] = fir.type_desc !fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>
 ! CHECK: %[[C4_CAST:.*]] = fir.convert %[[C4_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>>) -> !fir.ref<!fir.box<none>>
@@ -316,7 +316,7 @@
 ! CHECK: %[[C20_I64:.*]] = fir.convert %[[C20]] : (i32) -> i64
 ! CHECK: fir.call @_FortranAAllocatableSetBounds(%[[C4_CAST]], %[[C0]], %[[C1_I64]], %[[C20_I64]]) {{.*}}: (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
 ! CHECK: %[[C4_CAST:.*]] = fir.convert %[[C4_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C4_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C4_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 
 ! CHECK: %[[C1_LOAD1:.*]] = fir.load %[[C1_DECL]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>
 ! CHECK: fir.dispatch "proc1"(%[[C1_LOAD1]] : !fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>)
@@ -390,7 +390,7 @@
 ! CHECK: %[[CORANK:.*]] = arith.constant 0 : i32
 ! CHECK: fir.call @_FortranAAllocatableInitIntrinsicForAllocate(%[[BOX_NONE]], %[[CAT]], %[[KIND]], %[[RANK]], %[[CORANK]]) {{.*}} : (!fir.ref<!fir.box<none>>, i32, i32, i32, i32) -> ()
 ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[P_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<none>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 
 ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[PTR_DECL]]#0 : (!fir.ref<!fir.class<!fir.ptr<none>>>) -> !fir.ref<!fir.box<none>>
 ! CHECK: %[[CAT:.*]] = arith.constant 2 : i32
@@ -573,7 +573,7 @@
 ! CHECK: %[[CORANK:.*]] = arith.constant 0 : i32
 ! CHECK: fir.call @_FortranAAllocatableInitCharacterForAllocate(%[[A_NONE]], %[[LEN]], %[[KIND]], %[[RANK]], %[[CORANK]]) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i32, i32, i32) -> ()
 ! CHECK: %[[A_NONE:.*]] = fir.convert %[[A_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<none>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 
 end module
 
@@ -592,17 +592,17 @@
 ! LLVM-LABEL: define void @_QMpolyPtest_allocatable()
 
 ! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp1, i32 0, i32 0)
-! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
+! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
 ! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp1, i32 0, i32 0)
-! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
+! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
 ! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp2, i32 0, i32 0)
-! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
+! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
 ! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp1, i32 1, i32 0)
 ! LLVM: call void @_FortranAAllocatableSetBounds(ptr %{{.*}}, i32 0, i64 1, i64 10)
-! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
+! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
 ! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp2, i32 1, i32 0)
 ! LLVM: call void @_FortranAAllocatableSetBounds(ptr %{{.*}}, i32 0, i64 1, i64 20)
-! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
+! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
 ! LLVM-COUNT-2:  call void %{{[0-9]*}}()
 
 ! LLVM: call void @llvm.memcpy.p0.p0.i32
@@ -683,5 +683,5 @@
 ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } { ptr null, i64 8, i32 20240719, i8 0, i8 42, i8 2, i8 1, ptr @_QMpolyEXdtXp1, [1 x i64] zeroinitializer }, ptr %[[ALLOCA1:[0-9]*]]
 ! LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[ALLOCA2:[0-9]+]], ptr %[[ALLOCA1]], i32 40, i1 false)
 ! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %[[ALLOCA2]], ptr @_QMpolyEXdtXp1, i32 0, i32 0)
-! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %[[ALLOCA2]], i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
+! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %[[ALLOCA2]], i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
 ! LLVM: %{{.*}} = call i32 @_FortranAAllocatableDeallocatePolymorphic(ptr %[[ALLOCA2]], ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
diff --git a/flang/test/Lower/allocatable-runtime.f90 b/flang/test/Lower/allocatable-runtime.f90
index 9670a1e..37272c9 100644
--- a/flang/test/Lower/allocatable-runtime.f90
+++ b/flang/test/Lower/allocatable-runtime.f90
@@ -31,7 +31,7 @@
   ! CHECK: fir.call @{{.*}}AllocatableSetBounds(%[[xBoxCast2]], %c0{{.*}}, %[[xlbCast]], %[[xubCast]]) {{.*}}: (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
   ! CHECK-DAG: %[[xBoxCast3:.*]] = fir.convert %[[xBoxAddr]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
   ! CHECK-DAG: %[[sourceFile:.*]] = fir.convert %{{.*}} -> !fir.ref<i8>
-  ! CHECK: fir.call @{{.*}}AllocatableAllocate(%[[xBoxCast3]], %false{{.*}}, %[[errMsg]], %[[sourceFile]], %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+  ! CHECK: fir.call @{{.*}}AllocatableAllocate(%[[xBoxCast3]], %{{.*}}, %false{{.*}}, %[[errMsg]], %[[sourceFile]], %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 
   ! Simply check that we are emitting the right numebr of set bound for y and z. Otherwise, this is just like x.
   ! CHECK: fir.convert %[[yBoxAddr]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>
@@ -180,4 +180,4 @@
 ! CHECK: %[[M_BOX_NONE:.*]] = fir.convert %[[EMBOX_M]] : (!fir.box<!fir.array<10xi32>>) -> !fir.box<none>
 ! CHECK: fir.call @_FortranAAllocatableApplyMold(%[[A_BOX_NONE]], %[[M_BOX_NONE]], %[[RANK]]) {{.*}} : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32) -> ()
 ! CHECK: %[[A_BOX_NONE:.*]] = fir.convert %[[A]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
diff --git a/flang/test/Lower/allocate-mold.f90 b/flang/test/Lower/allocate-mold.f90
index e50861a..c7985b1 100644
--- a/flang/test/Lower/allocate-mold.f90
+++ b/flang/test/Lower/allocate-mold.f90
@@ -16,7 +16,7 @@
 ! CHECK: %[[A_REF_BOX_NONE1:.*]] = fir.convert %[[A]] : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> !fir.ref<!fir.box<none>>
 ! CHECK: fir.call @_FortranAAllocatableApplyMold(%[[A_REF_BOX_NONE1]], %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32) -> ()
 ! CHECK: %[[A_REF_BOX_NONE2:.*]] = fir.convert %[[A]] : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_REF_BOX_NONE2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_REF_BOX_NONE2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 
 subroutine array_scalar_mold_allocation()
   real, allocatable :: a(:)
@@ -40,4 +40,4 @@
 ! CHECK: %[[REF_BOX_A1:.*]] = fir.convert %1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
 ! CHECK: fir.call @_FortranAAllocatableSetBounds(%[[REF_BOX_A1]], {{.*}},{{.*}}, {{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
 ! CHECK: %[[REF_BOX_A2:.*]] = fir.convert %[[A]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[REF_BOX_A2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[REF_BOX_A2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
diff --git a/flang/test/Lower/polymorphic.f90 b/flang/test/Lower/polymorphic.f90
index 10793d8..485861a 100644
--- a/flang/test/Lower/polymorphic.f90
+++ b/flang/test/Lower/polymorphic.f90
@@ -1149,7 +1149,7 @@
 ! CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "test"} {
 ! CHECK: %[[ADDR_O:.*]] = fir.address_of(@_QFEo) : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}>>>>
 ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[ADDR_O]] : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 ! CHECK: %[[O:.*]] = fir.load %[[ADDR_O]] : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}>>>>
 ! CHECK: %[[COORD_INNER:.*]] = fir.coordinate_of %[[O]], inner : (!fir.box<!fir.heap<!fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}>>>) -> !fir.ref<!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>>
 ! CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%arg1 = %{{.*}}) -> (!fir.array<5x!fir.logical<4>>) {
diff --git a/flang/test/Transforms/lower-repack-arrays.fir b/flang/test/Transforms/lower-repack-arrays.fir
index 7317d8f..bbae7ba 100644
--- a/flang/test/Transforms/lower-repack-arrays.fir
+++ b/flang/test/Transforms/lower-repack-arrays.fir
@@ -840,7 +840,7 @@
 // CHECK:               %[[VAL_34:.*]] = fir.absent !fir.box<none>
 // CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_7]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMmTt>>>>>) -> !fir.ref<!fir.box<none>>
 // CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_33]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK:               %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK:               %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 // CHECK:               %[[VAL_38:.*]] = fir.load %[[VAL_22]] : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMmTt>>>>>
 // CHECK:               %[[VAL_39:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_40:.*]] = fir.convert %[[VAL_38]] : (!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMmTt>>>>) -> !fir.box<none>
@@ -928,7 +928,7 @@
 // CHECK:               %[[VAL_34:.*]] = fir.absent !fir.box<none>
 // CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_7]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMmTt>>>>>) -> !fir.ref<!fir.box<none>>
 // CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_33]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK:               %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK:               %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 // CHECK:               %[[VAL_38:.*]] = fir.load %[[VAL_22]] : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMmTt>>>>>
 // CHECK:               %[[VAL_39:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_40:.*]] = fir.convert %[[VAL_38]] : (!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMmTt>>>>) -> !fir.box<none>
@@ -1015,7 +1015,7 @@
 // CHECK:               %[[VAL_34:.*]] = fir.absent !fir.box<none>
 // CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_7]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>) -> !fir.ref<!fir.box<none>>
 // CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_33]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK:               %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK:               %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 // CHECK:               %[[VAL_38:.*]] = fir.load %[[VAL_22]] : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>
 // CHECK:               %[[VAL_39:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_40:.*]] = fir.convert %[[VAL_38]] : (!fir.class<!fir.heap<!fir.array<?x?xnone>>>) -> !fir.box<none>
@@ -1103,7 +1103,7 @@
 // CHECK:               %[[VAL_34:.*]] = fir.absent !fir.box<none>
 // CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_7]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>) -> !fir.ref<!fir.box<none>>
 // CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_33]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK:               %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK:               %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 // CHECK:               %[[VAL_38:.*]] = fir.load %[[VAL_22]] : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>
 // CHECK:               %[[VAL_39:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_40:.*]] = fir.convert %[[VAL_38]] : (!fir.class<!fir.heap<!fir.array<?x?xnone>>>) -> !fir.box<none>