[Polly][Codegen] Emit access group metadata.

Emit llvm.loop.parallel_accesses metadata instead of
llvm.mem.parallel_loop_access. The latter is deprecated because it
assumes that LoopIDs are persistent, which they are not.
We also emit parallel access metadata for all surrounding parallel
loops, not just the innermost parallel.

GitOrigin-RevId: b85c98b4c5734b8570e1392fb15d2f9fc2581d6e
diff --git a/include/polly/CodeGen/IRBuilder.h b/include/polly/CodeGen/IRBuilder.h
index fb5d14a..94b8643 100644
--- a/include/polly/CodeGen/IRBuilder.h
+++ b/include/polly/CodeGen/IRBuilder.h
@@ -100,7 +100,7 @@
   /// All loops currently under construction.
   llvm::SmallVector<llvm::Loop *, 8> ActiveLoops;
 
-  /// Metadata pointing to parallel loops currently under construction.
+  /// Access groups for the parallel loops currently under construction.
   llvm::SmallVector<llvm::MDNode *, 8> ParallelLoops;
 
   /// The alias scope domain for the current SCoP.
diff --git a/lib/CodeGen/IRBuilder.cpp b/lib/CodeGen/IRBuilder.cpp
index 001a907..374f2ea 100644
--- a/lib/CodeGen/IRBuilder.cpp
+++ b/lib/CodeGen/IRBuilder.cpp
@@ -94,51 +94,52 @@
 }
 
 void ScopAnnotator::pushLoop(Loop *L, bool IsParallel) {
-
   ActiveLoops.push_back(L);
-  if (!IsParallel)
-    return;
 
-  BasicBlock *Header = L->getHeader();
-  MDNode *Id = getID(Header->getContext());
-  assert(Id->getOperand(0) == Id && "Expected Id to be a self-reference");
-  assert(Id->getNumOperands() == 1 && "Unexpected extra operands in Id");
-  MDNode *Ids = ParallelLoops.empty()
-                    ? Id
-                    : MDNode::concatenate(ParallelLoops.back(), Id);
-  ParallelLoops.push_back(Ids);
+  if (IsParallel) {
+    LLVMContext &Ctx = SE->getContext();
+    MDNode *AccessGroup = MDNode::getDistinct(Ctx, {});
+    ParallelLoops.push_back(AccessGroup);
+  }
 }
 
 void ScopAnnotator::popLoop(bool IsParallel) {
   ActiveLoops.pop_back();
-  if (!IsParallel)
-    return;
 
-  assert(!ParallelLoops.empty() && "Expected a parallel loop to pop");
-  ParallelLoops.pop_back();
+  if (IsParallel) {
+    assert(!ParallelLoops.empty() && "Expected a parallel loop to pop");
+    ParallelLoops.pop_back();
+  }
 }
 
 void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
                                       bool IsLoopVectorizerDisabled) const {
-  MDNode *MData = nullptr;
+  LLVMContext &Ctx = SE->getContext();
+  SmallVector<Metadata *, 3> Args;
+
+  // For the LoopID self-reference.
+  Args.push_back(nullptr);
 
   if (IsLoopVectorizerDisabled) {
-    SmallVector<Metadata *, 3> Args;
-    LLVMContext &Ctx = SE->getContext();
-    Args.push_back(MDString::get(Ctx, "llvm.loop.vectorize.enable"));
-    auto *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0);
-    Args.push_back(ValueAsMetadata::get(FalseValue));
-    MData = MDNode::concatenate(MData, getID(Ctx, MDNode::get(Ctx, Args)));
+    MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable");
+    ConstantInt *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0);
+    ValueAsMetadata *PropValue = ValueAsMetadata::get(FalseValue);
+    Args.push_back(MDNode::get(Ctx, {PropName, PropValue}));
   }
 
   if (IsParallel) {
-    assert(!ParallelLoops.empty() && "Expected a parallel loop to annotate");
-    MDNode *Ids = ParallelLoops.back();
-    MDNode *Id = cast<MDNode>(Ids->getOperand(Ids->getNumOperands() - 1));
-    MData = MDNode::concatenate(MData, Id);
+    MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses");
+    MDNode *AccGroup = ParallelLoops.back();
+    Args.push_back(MDNode::get(Ctx, {PropName, AccGroup}));
   }
 
-  B->setMetadata("llvm.loop", MData);
+  // No metadata to annotate.
+  if (Args.size() <= 1)
+    return;
+
+  MDNode *MData = MDNode::getDistinct(Ctx, Args);
+  MData->replaceOperandWith(0, MData);
+  B->setMetadata(LLVMContext::MD_loop, MData);
 }
 
 /// Get the pointer operand
@@ -214,8 +215,24 @@
   if (!Inst->mayReadOrWriteMemory())
     return;
 
-  if (!ParallelLoops.empty())
-    Inst->setMetadata("llvm.mem.parallel_loop_access", ParallelLoops.back());
+  switch (ParallelLoops.size()) {
+  case 0:
+    // Not parallel to anything: no access group needed.
+    break;
+  case 1:
+    // Single parallel loop: use directly.
+    Inst->setMetadata(LLVMContext::MD_access_group,
+                      cast<MDNode>(ParallelLoops.front()));
+    break;
+  default:
+    // Parallel to multiple loops: refer to list of access groups.
+    Inst->setMetadata(LLVMContext::MD_access_group,
+                      MDNode::get(SE->getContext(),
+                                  ArrayRef<Metadata *>(
+                                      (Metadata *const *)ParallelLoops.data(),
+                                      ParallelLoops.size())));
+    break;
+  }
 
   // TODO: Use the ScopArrayInfo once available here.
   if (!AliasScopeDomain)
diff --git a/test/CodeGen/stride_detection.ll b/test/CodeGen/stride_detection.ll
index da0abb0..0bbaaa3 100644
--- a/test/CodeGen/stride_detection.ll
+++ b/test/CodeGen/stride_detection.ll
@@ -10,13 +10,13 @@
 ;             Stmt_for_body_3(32 * c0 + 4 * c2 + c4, 32 * c1 + c3);
 
 ; CHECK: polly.stmt.for.body.3:                            ; preds = %polly.loop_header18
-; CHECK:   %_p_splat_one = load <1 x double>, <1 x double>* %_p_vec_p, align 8, !alias.scope !1, !noalias !3, !llvm.mem.parallel_loop_access !0
-; CHECK:   %_p_vec_full = load <4 x double>, <4 x double>* %vector_ptr, align 8, !alias.scope !4, !noalias !5, !llvm.mem.parallel_loop_access !0
+; CHECK:   %_p_splat_one = load <1 x double>, <1 x double>* %_p_vec_p, align 8, !alias.scope !3, !noalias !5, !llvm.access.group !2
+; CHECK:   %_p_vec_full = load <4 x double>, <4 x double>* %vector_ptr, align 8, !alias.scope !6, !noalias !7, !llvm.access.group !2
 ; CHECK:   extractelement <4 x double> %addp_vec, i32 0
 ; CHECK:   extractelement <4 x double> %addp_vec, i32 1
 ; CHECK:   extractelement <4 x double> %addp_vec, i32 2
 ; CHECK:   extractelement <4 x double> %addp_vec, i32 3
-; CHECK:   store <4 x double> %addp_vec, <4 x double>* {{.*}}, align 8, !alias.scope !4, !noalias !5, !llvm.mem.parallel_loop_access !0
+; CHECK:   store <4 x double> %addp_vec, <4 x double>* {{.*}}, align 8, !alias.scope !6, !noalias !7, !llvm.access.group !2
 
 define void @kernel_gemm(i32 %ni, i32 %nj, i32 %nk, [1024 x double]* %C, [1024 x double]* %A) #0 {
 entry:
diff --git a/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll b/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll
index 49798ba..617aaa7 100644
--- a/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll
+++ b/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll
@@ -8,11 +8,17 @@
 ; CHECK-DAG:  %polly.loop_cond[[CInner:[0-9]*]] = icmp sle i64 %polly.indvar_next{{[0-9]*}}, 511
 ; CHECK-DAG:  br i1 %polly.loop_cond[[CInner]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop ![[IDInner:[0-9]*]]
 ;
-; CHECK-DAG: store i32 %{{[a-z_0-9]*}}, i32* %{{[a-z_0-9]*}}, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access !4
+; CHECK-DAG: store i32 %{{[a-z_0-9]*}}, i32* %{{[a-z_0-9]*}}, {{[ ._!,a-zA-Z0-9]*}}, !llvm.access.group ![[GROUPLST6:[0-9]+]]
 ;
-; CHECK-DAG: ![[IDOuter]] = distinct !{![[IDOuter]]}
-; CHECK-DAG: ![[IDInner]] = distinct !{![[IDInner]]}
-; CHECK-DAG: !4 = !{![[IDOuter]], ![[IDInner]]}
+; CHECK-DAG: ![[IDOuter]] = distinct !{![[IDOuter]], ![[ACCGROUP1:[0-9]+]]}
+; CHECK-DAG: ![[ACCGROUP1]] = !{!"llvm.loop.parallel_accesses", ![[GROUP2:[0-9]+]]}
+; CHECK-DAG: ![[GROUP2]] = distinct !{}
+; CHECK-DAG: ![[GROUPLST6]] = !{![[GROUP2]], ![[GROUP7:[0-9]+]]}
+; CHECK-DAG: ![[GROUP7]] = distinct !{}
+; CHECK-DAG: ![[IDInner]] = distinct !{![[IDInner]], ![[ACCGROUP9:[0-9]+]]}
+; CHECK-DAG: ![[ACCGROUP9]] = !{!"llvm.loop.parallel_accesses", ![[GROUP7]]}
+
+;
 ;
 ;    void jd(int *A) {
 ;      for (int i = 0; i < 1024; i++)
diff --git a/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll b/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll
index 7555d84..ce96eef 100644
--- a/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll
+++ b/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll
@@ -33,13 +33,15 @@
   ret void
 }
 
-; SEQUENTIAL: @test-one
+; SEQUENTIAL-LABEL: @test-one
 ; SEQUENTIAL-NOT: !llvm.mem.parallel_loop_access
+; SEQUENTIAL-NOT: !llvm.access.group
 ; SEQUENTIAL-NOT: !llvm.loop
 
 ; PARALLEL: @test-one
-; PARALLEL: store i32 1, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]]
-; PARALLEL:  br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]]
+; PARALLEL: store i32 1, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.access.group ![[GROUPID3:[0-9]+]]
+; PARALLEL:  br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID4:[0-9]+]]
+
 
 ; This loop has memory dependences that require at least a simple dependence
 ; analysis to detect the parallelism.
@@ -76,11 +78,18 @@
   ret void
 }
 
-; SEQUENTIAL: @test-two
+; SEQUENTIAL-LABEL: @test-two
 ; SEQUENTIAL-NOT: !llvm.mem.parallel_loop_access
+; SEQUENTIAL-NOT: !llvm.access.group
 ; SEQUENTIAL-NOT: !llvm.loop
 
 ; PARALLEL: @test-two
-; PARALLEL: %val_p_scalar_ = load i32, i32* %scevgep, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]]
-; PARALLEL: store i32 %val_p_scalar_, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID]]
-; PARALLEL:  br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]]
+; PARALLEL: %val_p_scalar_ = load i32, i32* %scevgep, {{[ ._!,a-zA-Z0-9]*}}, !llvm.access.group ![[GROUPID8:[0-9]*]]
+; PARALLEL: store i32 %val_p_scalar_, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.access.group ![[GROUPID8]]
+; PARALLEL:  br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID9:[0-9]*]]
+
+
+; PARALLEL: ![[LoopID4]] = distinct !{![[LoopID4]], ![[PARACC5:[0-9]+]]}
+; PARALLEL: ![[PARACC5]] = !{!"llvm.loop.parallel_accesses", ![[GROUPID3]]}
+; PARALLEL: ![[LoopID9]] = distinct !{![[LoopID9]], ![[PARACC10:[0-9]+]]}
+; PARALLEL: ![[PARACC10]] = !{!"llvm.loop.parallel_accesses", ![[GROUPID8]]}
diff --git a/test/Isl/CodeGen/OpenMP/new_multidim_access.ll b/test/Isl/CodeGen/OpenMP/new_multidim_access.ll
index 02dce82..b28a914 100644
--- a/test/Isl/CodeGen/OpenMP/new_multidim_access.ll
+++ b/test/Isl/CodeGen/OpenMP/new_multidim_access.ll
@@ -23,13 +23,13 @@
 ; IR: %6 = add nsw i64 %polly.indvar5, 13
 ; IR: %polly.access.add.polly.subfunc.arg.A = add nsw i64 %polly.access.mul.polly.subfunc.arg.A, %6
 ; IR: %polly.access.polly.subfunc.arg.A = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A
-; IR: %tmp10_p_scalar_ = load float, float* %polly.access.polly.subfunc.arg.A, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3
+; IR: %tmp10_p_scalar_ = load float, float* %polly.access.polly.subfunc.arg.A, align 4, !alias.scope !0, !noalias !2, !llvm.access.group !3
 
 ; IR: %polly.access.mul.polly.subfunc.arg.A8 = mul nsw i64 %polly.indvar, %polly.subfunc.arg.m
 ; IR: %7 = add nsw i64 %polly.indvar5, 43
 ; IR: %polly.access.add.polly.subfunc.arg.A9 = add nsw i64 %polly.access.mul.polly.subfunc.arg.A8, %7
 ; IR: %polly.access.polly.subfunc.arg.A10 = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A9
-; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A10, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_
+; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A10, align 4, !alias.scope !0, !noalias !2, !llvm.access.group !3
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define void @new_multidim_access(i64 %n, i64 %m, float* %A) {
diff --git a/test/Isl/CodeGen/getNumberOfIterations.ll b/test/Isl/CodeGen/getNumberOfIterations.ll
index dc7ad9d..a1eec2d 100644
--- a/test/Isl/CodeGen/getNumberOfIterations.ll
+++ b/test/Isl/CodeGen/getNumberOfIterations.ll
@@ -8,9 +8,9 @@
 ; CHECK: polly.stmt.if.then:                               ; preds = %polly.loop_header
 ; CHECK:   %p_conv = sitofp i64 %polly.indvar to float
 ; CHECK:   %scevgep = getelementptr float, float* %A, i64 %polly.indvar
-; CHECK:   %_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3
+; CHECK:   %_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2, !llvm.access.group !3
 ; CHECK:   %p_add = fadd float %p_conv, %_p_scalar_
-; CHECK:   store float %p_add, float* %scevgep, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3
+; CHECK:   store float %p_add, float* %scevgep, align 4, !alias.scope !0, !noalias !2, !llvm.access.group !3
 
 define void @foo(float* %A, i64 %N) #0 {
 entry: