diff --git a/include/polly/CodeGen/IRBuilder.h b/include/polly/CodeGen/IRBuilder.h
index fb5d14a..94b8643 100644
--- a/include/polly/CodeGen/IRBuilder.h
+++ b/include/polly/CodeGen/IRBuilder.h
@@ -100,7 +100,7 @@
   /// All loops currently under construction.
   llvm::SmallVector<llvm::Loop *, 8> ActiveLoops;
 
-  /// Metadata pointing to parallel loops currently under construction.
+  /// Access groups for the parallel loops currently under construction.
   llvm::SmallVector<llvm::MDNode *, 8> ParallelLoops;
 
   /// The alias scope domain for the current SCoP.
diff --git a/lib/CodeGen/IRBuilder.cpp b/lib/CodeGen/IRBuilder.cpp
index 001a907..374f2ea 100644
--- a/lib/CodeGen/IRBuilder.cpp
+++ b/lib/CodeGen/IRBuilder.cpp
@@ -94,51 +94,52 @@
 }
 
 void ScopAnnotator::pushLoop(Loop *L, bool IsParallel) {
-
   ActiveLoops.push_back(L);
-  if (!IsParallel)
-    return;
 
-  BasicBlock *Header = L->getHeader();
-  MDNode *Id = getID(Header->getContext());
-  assert(Id->getOperand(0) == Id && "Expected Id to be a self-reference");
-  assert(Id->getNumOperands() == 1 && "Unexpected extra operands in Id");
-  MDNode *Ids = ParallelLoops.empty()
-                    ? Id
-                    : MDNode::concatenate(ParallelLoops.back(), Id);
-  ParallelLoops.push_back(Ids);
+  if (IsParallel) {
+    LLVMContext &Ctx = SE->getContext();
+    MDNode *AccessGroup = MDNode::getDistinct(Ctx, {});
+    ParallelLoops.push_back(AccessGroup);
+  }
 }
 
 void ScopAnnotator::popLoop(bool IsParallel) {
   ActiveLoops.pop_back();
-  if (!IsParallel)
-    return;
 
-  assert(!ParallelLoops.empty() && "Expected a parallel loop to pop");
-  ParallelLoops.pop_back();
+  if (IsParallel) {
+    assert(!ParallelLoops.empty() && "Expected a parallel loop to pop");
+    ParallelLoops.pop_back();
+  }
 }
 
 void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
                                       bool IsLoopVectorizerDisabled) const {
-  MDNode *MData = nullptr;
+  LLVMContext &Ctx = SE->getContext();
+  SmallVector<Metadata *, 3> Args;
+
+  // For the LoopID self-reference.
+  Args.push_back(nullptr);
 
   if (IsLoopVectorizerDisabled) {
-    SmallVector<Metadata *, 3> Args;
-    LLVMContext &Ctx = SE->getContext();
-    Args.push_back(MDString::get(Ctx, "llvm.loop.vectorize.enable"));
-    auto *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0);
-    Args.push_back(ValueAsMetadata::get(FalseValue));
-    MData = MDNode::concatenate(MData, getID(Ctx, MDNode::get(Ctx, Args)));
+    MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable");
+    ConstantInt *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0);
+    ValueAsMetadata *PropValue = ValueAsMetadata::get(FalseValue);
+    Args.push_back(MDNode::get(Ctx, {PropName, PropValue}));
   }
 
   if (IsParallel) {
-    assert(!ParallelLoops.empty() && "Expected a parallel loop to annotate");
-    MDNode *Ids = ParallelLoops.back();
-    MDNode *Id = cast<MDNode>(Ids->getOperand(Ids->getNumOperands() - 1));
-    MData = MDNode::concatenate(MData, Id);
+    MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses");
+    MDNode *AccGroup = ParallelLoops.back();
+    Args.push_back(MDNode::get(Ctx, {PropName, AccGroup}));
   }
 
-  B->setMetadata("llvm.loop", MData);
+  // No metadata to annotate.
+  if (Args.size() <= 1)
+    return;
+
+  MDNode *MData = MDNode::getDistinct(Ctx, Args);
+  MData->replaceOperandWith(0, MData);
+  B->setMetadata(LLVMContext::MD_loop, MData);
 }
 
 /// Get the pointer operand
@@ -214,8 +215,24 @@
   if (!Inst->mayReadOrWriteMemory())
     return;
 
-  if (!ParallelLoops.empty())
-    Inst->setMetadata("llvm.mem.parallel_loop_access", ParallelLoops.back());
+  switch (ParallelLoops.size()) {
+  case 0:
+    // Not parallel to anything: no access group needed.
+    break;
+  case 1:
+    // Single parallel loop: use directly.
+    Inst->setMetadata(LLVMContext::MD_access_group,
+                      cast<MDNode>(ParallelLoops.front()));
+    break;
+  default:
+    // Parallel to multiple loops: refer to list of access groups.
+    Inst->setMetadata(LLVMContext::MD_access_group,
+                      MDNode::get(SE->getContext(),
+                                  ArrayRef<Metadata *>(
+                                      (Metadata *const *)ParallelLoops.data(),
+                                      ParallelLoops.size())));
+    break;
+  }
 
   // TODO: Use the ScopArrayInfo once available here.
   if (!AliasScopeDomain)
diff --git a/test/CodeGen/stride_detection.ll b/test/CodeGen/stride_detection.ll
index da0abb0..0bbaaa3 100644
--- a/test/CodeGen/stride_detection.ll
+++ b/test/CodeGen/stride_detection.ll
@@ -10,13 +10,13 @@
 ;             Stmt_for_body_3(32 * c0 + 4 * c2 + c4, 32 * c1 + c3);
 
 ; CHECK: polly.stmt.for.body.3:                            ; preds = %polly.loop_header18
-; CHECK:   %_p_splat_one = load <1 x double>, <1 x double>* %_p_vec_p, align 8, !alias.scope !1, !noalias !3, !llvm.mem.parallel_loop_access !0
-; CHECK:   %_p_vec_full = load <4 x double>, <4 x double>* %vector_ptr, align 8, !alias.scope !4, !noalias !5, !llvm.mem.parallel_loop_access !0
+; CHECK:   %_p_splat_one = load <1 x double>, <1 x double>* %_p_vec_p, align 8, !alias.scope !3, !noalias !5, !llvm.access.group !2
+; CHECK:   %_p_vec_full = load <4 x double>, <4 x double>* %vector_ptr, align 8, !alias.scope !6, !noalias !7, !llvm.access.group !2
 ; CHECK:   extractelement <4 x double> %addp_vec, i32 0
 ; CHECK:   extractelement <4 x double> %addp_vec, i32 1
 ; CHECK:   extractelement <4 x double> %addp_vec, i32 2
 ; CHECK:   extractelement <4 x double> %addp_vec, i32 3
-; CHECK:   store <4 x double> %addp_vec, <4 x double>* {{.*}}, align 8, !alias.scope !4, !noalias !5, !llvm.mem.parallel_loop_access !0
+; CHECK:   store <4 x double> %addp_vec, <4 x double>* {{.*}}, align 8, !alias.scope !6, !noalias !7, !llvm.access.group !2
 
 define void @kernel_gemm(i32 %ni, i32 %nj, i32 %nk, [1024 x double]* %C, [1024 x double]* %A) #0 {
 entry:
diff --git a/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll b/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll
index 49798ba..617aaa7 100644
--- a/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll
+++ b/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll
@@ -8,11 +8,17 @@
 ; CHECK-DAG:  %polly.loop_cond[[CInner:[0-9]*]] = icmp sle i64 %polly.indvar_next{{[0-9]*}}, 511
 ; CHECK-DAG:  br i1 %polly.loop_cond[[CInner]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop ![[IDInner:[0-9]*]]
 ;
-; CHECK-DAG: store i32 %{{[a-z_0-9]*}}, i32* %{{[a-z_0-9]*}}, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access !4
+; CHECK-DAG: store i32 %{{[a-z_0-9]*}}, i32* %{{[a-z_0-9]*}}, {{[ ._!,a-zA-Z0-9]*}}, !llvm.access.group ![[GROUPLST6:[0-9]+]]
 ;
-; CHECK-DAG: ![[IDOuter]] = distinct !{![[IDOuter]]}
-; CHECK-DAG: ![[IDInner]] = distinct !{![[IDInner]]}
-; CHECK-DAG: !4 = !{![[IDOuter]], ![[IDInner]]}
+; CHECK-DAG: ![[IDOuter]] = distinct !{![[IDOuter]], ![[ACCGROUP1:[0-9]+]]}
+; CHECK-DAG: ![[ACCGROUP1]] = !{!"llvm.loop.parallel_accesses", ![[GROUP2:[0-9]+]]}
+; CHECK-DAG: ![[GROUP2]] = distinct !{}
+; CHECK-DAG: ![[GROUPLST6]] = !{![[GROUP2]], ![[GROUP7:[0-9]+]]}
+; CHECK-DAG: ![[GROUP7]] = distinct !{}
+; CHECK-DAG: ![[IDInner]] = distinct !{![[IDInner]], ![[ACCGROUP9:[0-9]+]]}
+; CHECK-DAG: ![[ACCGROUP9]] = !{!"llvm.loop.parallel_accesses", ![[GROUP7]]}
+
+;
 ;
 ;    void jd(int *A) {
 ;      for (int i = 0; i < 1024; i++)
diff --git a/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll b/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll
index 7555d84..ce96eef 100644
--- a/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll
+++ b/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll
@@ -33,13 +33,15 @@
   ret void
 }
 
-; SEQUENTIAL: @test-one
+; SEQUENTIAL-LABEL: @test-one
 ; SEQUENTIAL-NOT: !llvm.mem.parallel_loop_access
+; SEQUENTIAL-NOT: !llvm.access.group
 ; SEQUENTIAL-NOT: !llvm.loop
 
 ; PARALLEL: @test-one
-; PARALLEL: store i32 1, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]]
-; PARALLEL:  br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]]
+; PARALLEL: store i32 1, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.access.group ![[GROUPID3:[0-9]+]]
+; PARALLEL:  br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID4:[0-9]+]]
+
 
 ; This loop has memory dependences that require at least a simple dependence
 ; analysis to detect the parallelism.
@@ -76,11 +78,18 @@
   ret void
 }
 
-; SEQUENTIAL: @test-two
+; SEQUENTIAL-LABEL: @test-two
 ; SEQUENTIAL-NOT: !llvm.mem.parallel_loop_access
+; SEQUENTIAL-NOT: !llvm.access.group
 ; SEQUENTIAL-NOT: !llvm.loop
 
 ; PARALLEL: @test-two
-; PARALLEL: %val_p_scalar_ = load i32, i32* %scevgep, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]]
-; PARALLEL: store i32 %val_p_scalar_, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID]]
-; PARALLEL:  br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]]
+; PARALLEL: %val_p_scalar_ = load i32, i32* %scevgep, {{[ ._!,a-zA-Z0-9]*}}, !llvm.access.group ![[GROUPID8:[0-9]*]]
+; PARALLEL: store i32 %val_p_scalar_, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.access.group ![[GROUPID8]]
+; PARALLEL:  br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID9:[0-9]*]]
+
+
+; PARALLEL: ![[LoopID4]] = distinct !{![[LoopID4]], ![[PARACC5:[0-9]+]]}
+; PARALLEL: ![[PARACC5]] = !{!"llvm.loop.parallel_accesses", ![[GROUPID3]]}
+; PARALLEL: ![[LoopID9]] = distinct !{![[LoopID9]], ![[PARACC10:[0-9]+]]}
+; PARALLEL: ![[PARACC10]] = !{!"llvm.loop.parallel_accesses", ![[GROUPID8]]}
diff --git a/test/Isl/CodeGen/OpenMP/new_multidim_access.ll b/test/Isl/CodeGen/OpenMP/new_multidim_access.ll
index 02dce82..b28a914 100644
--- a/test/Isl/CodeGen/OpenMP/new_multidim_access.ll
+++ b/test/Isl/CodeGen/OpenMP/new_multidim_access.ll
@@ -23,13 +23,13 @@
 ; IR: %6 = add nsw i64 %polly.indvar5, 13
 ; IR: %polly.access.add.polly.subfunc.arg.A = add nsw i64 %polly.access.mul.polly.subfunc.arg.A, %6
 ; IR: %polly.access.polly.subfunc.arg.A = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A
-; IR: %tmp10_p_scalar_ = load float, float* %polly.access.polly.subfunc.arg.A, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3
+; IR: %tmp10_p_scalar_ = load float, float* %polly.access.polly.subfunc.arg.A, align 4, !alias.scope !0, !noalias !2, !llvm.access.group !3
 
 ; IR: %polly.access.mul.polly.subfunc.arg.A8 = mul nsw i64 %polly.indvar, %polly.subfunc.arg.m
 ; IR: %7 = add nsw i64 %polly.indvar5, 43
 ; IR: %polly.access.add.polly.subfunc.arg.A9 = add nsw i64 %polly.access.mul.polly.subfunc.arg.A8, %7
 ; IR: %polly.access.polly.subfunc.arg.A10 = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A9
-; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A10, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_
+; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A10, align 4, !alias.scope !0, !noalias !2, !llvm.access.group !3
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define void @new_multidim_access(i64 %n, i64 %m, float* %A) {
diff --git a/test/Isl/CodeGen/getNumberOfIterations.ll b/test/Isl/CodeGen/getNumberOfIterations.ll
index dc7ad9d..a1eec2d 100644
--- a/test/Isl/CodeGen/getNumberOfIterations.ll
+++ b/test/Isl/CodeGen/getNumberOfIterations.ll
@@ -8,9 +8,9 @@
 ; CHECK: polly.stmt.if.then:                               ; preds = %polly.loop_header
 ; CHECK:   %p_conv = sitofp i64 %polly.indvar to float
 ; CHECK:   %scevgep = getelementptr float, float* %A, i64 %polly.indvar
-; CHECK:   %_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3
+; CHECK:   %_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2, !llvm.access.group !3
 ; CHECK:   %p_add = fadd float %p_conv, %_p_scalar_
-; CHECK:   store float %p_add, float* %scevgep, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3
+; CHECK:   store float %p_add, float* %scevgep, align 4, !alias.scope !0, !noalias !2, !llvm.access.group !3
 
 define void @foo(float* %A, i64 %N) #0 {
 entry:
