[AMDGPU] Correctly merge noalias scopes during lowering of LDS data. (#131664)
Currently, if there is already noalias metadata present on loads and
stores, lower module lds pass is generating a more conservative aliasing
set. This results in inhibiting scheduling intrinsics that would have
otherwise generated a better pipelined instruction.
The fix is not to always intersect already existing noalias metadata
with noalias created for lowering of LDS. But to intersect only if
noalias scopes are from the same domain, otherwise concatenate exising
noalias sets with LDS noalias.
There a few patches that have come for scopedAA in the past. Following
three should be enough background information.
https://reviews.llvm.org/D91576
https://reviews.llvm.org/D108315
https://reviews.llvm.org/D110049
Essentially, after a pass that might change aliasing info, one should
check if that pass results in change number of MayAlias or ModRef using
the following:
`opt -S -aa-pipeline=basic-aa,scoped-noalias-aa -passes=aa-eval
-evaluate-aa-metadata -print-all-alias-modref-info -disable-output`
diff --git a/llvm/include/llvm/Analysis/ScopedNoAliasAA.h b/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
index f6ade7c..96afe3c 100644
--- a/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
+++ b/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
@@ -43,6 +43,9 @@
ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
AAQueryInfo &AAQI);
+ void collectScopedDomains(const MDNode *NoAlias,
+ SmallPtrSetImpl<const MDNode *> &Domains) const;
+
private:
bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
};
diff --git a/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index a951ecb..4d6c0cc 100644
--- a/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -114,6 +114,18 @@
Nodes.insert(MD);
}
+/// Collect the set of scoped domains relevant to the noalias scopes.
+void ScopedNoAliasAAResult::collectScopedDomains(
+ const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) const {
+ if (!NoAlias)
+ return;
+ assert(Domains.empty() && "Domains should be empty");
+ for (const MDOperand &MDOp : NoAlias->operands())
+ if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
+ if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
+ Domains.insert(Domain);
+}
+
bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
const MDNode *NoAlias) const {
if (!Scopes || !NoAlias)
@@ -121,10 +133,7 @@
// Collect the set of scope domains relevant to the noalias scopes.
SmallPtrSet<const MDNode *, 16> Domains;
- for (const MDOperand &MDOp : NoAlias->operands())
- if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
- if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
- Domains.insert(Domain);
+ collectScopedDomains(NoAlias, Domains);
// We alias unless, for some domain, the set of noalias scopes in that domain
// is a superset of the set of alias scopes in that domain.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index f9f2d43..88acfe1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -186,6 +186,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -1441,6 +1442,8 @@
if (!MaxDepth || (A == 1 && !AliasScope))
return;
+ ScopedNoAliasAAResult ScopedNoAlias;
+
for (User *U : Ptr->users()) {
if (auto *I = dyn_cast<Instruction>(U)) {
if (AliasScope && I->mayReadOrWriteMemory()) {
@@ -1450,7 +1453,34 @@
I->setMetadata(LLVMContext::MD_alias_scope, AS);
MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);
- NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias);
+
+ // Scoped aliases can originate from two different domains.
+ // First domain would be from LDS domain (created by this pass).
+ // All entries (LDS vars) into LDS struct will have same domain.
+
+ // Second domain could be existing scoped aliases that are the
+ // results of noalias params and subsequent optimizations that
+ // may alter thesse sets.
+
+ // We need to be careful how we create new alias sets, and
+ // have right scopes and domains for loads/stores of these new
+ // LDS variables. We intersect NoAlias set if alias sets belong
+ // to the same domain. This is the case if we have memcpy using
+ // LDS variables. Both src and dst of memcpy would belong to
+ // LDS struct, they donot alias.
+ // On the other hand, if one of the domains is LDS and other is
+ // existing domain prior to LDS, we need to have a union of all
+ // these aliases set to preserve existing aliasing information.
+
+ SmallPtrSet<const MDNode *, 16> ExistingDomains, LDSDomains;
+ ScopedNoAlias.collectScopedDomains(NA, ExistingDomains);
+ ScopedNoAlias.collectScopedDomains(NoAlias, LDSDomains);
+ auto Intersection = set_intersection(ExistingDomains, LDSDomains);
+ if (Intersection.empty()) {
+ NA = NA ? MDNode::concatenate(NA, NoAlias) : NoAlias;
+ } else {
+ NA = NA ? MDNode::intersect(NA, NoAlias) : NoAlias;
+ }
I->setMetadata(LLVMContext::MD_noalias, NA);
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
index eefa0b2..92d0a05 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
@@ -84,7 +84,7 @@
define void @f0() {
; CHECK-LABEL: define void @f0()
; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !24
-; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !24
+; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !29
; CHECK-NEXT: ret void
store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1
diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll
index bb09d3a..154c798 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll
@@ -12,9 +12,9 @@
; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa [[TBAA1:![0-9]+]], !noalias !6
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 [[I]]
; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA1]], !noalias !6
-; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA1]], !noalias !6
+; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA1]], !noalias !11
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), i32 0, i32 [[I]]
-; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !noalias !6
+; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !noalias !11
; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_A]], [[VAL_B]]
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4
; CHECK-NEXT: ret void
@@ -48,4 +48,11 @@
; CHECK:!3 = !{!"int", !4, i64 0}
; CHECK:!4 = !{!"omnipotent char", !5, i64 0}
; CHECK:!5 = !{!"Simple C++ TBAA"}
-; CHECK:!6 = !{}
+; CHECK:!6 = !{!7, !9}
+; CHECK:!7 = distinct !{!7, !8}
+; CHECK:!8 = distinct !{!8}
+; CHECK:!9 = distinct !{!9, !10}
+; CHECK:!10 = distinct !{!10}
+; CHECK:!11 = !{!12, !13}
+; CHECK:!12 = distinct !{!12, !8}
+; CHECK:!13 = distinct !{!13, !10}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-with-alias-scope.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-with-alias-scope.ll
new file mode 100644
index 0000000..d8d7fc1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-lds-with-alias-scope.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O3 < %s | FileCheck -check-prefix=GCN %s
+
+@a = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
+@b = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
+@c = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
+
+define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) {
+; GCN-LABEL: ds_load_stores_aainfo:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dword s0, s[4:5], 0x2c
+; GCN-NEXT: v_mov_b32_e32 v0, 1
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_lshl_b32 s0, s0, 2
+; GCN-NEXT: v_mov_b32_e32 v4, s0
+; GCN-NEXT: ds_read2_b32 v[2:3], v4 offset1:1
+; GCN-NEXT: ds_write_b64 v1, v[0:1] offset:512
+; GCN-NEXT: ds_read2_b32 v[4:5], v4 offset0:64 offset1:65
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
+; GCN-NEXT: ; sched_group_barrier mask(0x00000200) size(1) SyncID(0)
+; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
+; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
+; GCN-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
+; GCN-NEXT: s_endpgm
+bb:
+ %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
+ %gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i
+
+ %val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !alias.scope !6, !noalias !5
+ %val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !alias.scope !6, !noalias !5
+
+ store i64 1, ptr addrspace(3) @c, align 4, !tbaa !0, !noalias !2
+
+ %val = add i64 %val.a, %val.b
+ store i64 %val, ptr addrspace(1) %arg, align 4
+
+ tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
+ tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
+ tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
+ ret void
+}
+
+ !0 = !{!"omnipotent char", !1, i64 0}
+ !1 = !{!1}
+ !2 = !{!3}
+ !3 = distinct !{!3, !4}
+ !4 = distinct !{!4}
+ !5 = !{!3}
+ !6 = !{!7}
+ !7 = !{!7, !4}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-with-noalias.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-with-noalias.ll
new file mode 100644
index 0000000..0d0daea
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-lds-with-noalias.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O3 --amdgpu-lower-module-lds-strategy=module < %s | FileCheck -check-prefix=GCN %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+
+@a = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
+@b = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
+@c = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
+
+define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) {
+; GCN-LABEL: ds_load_stores_aainfo:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dword s0, s[4:5], 0x2c
+; GCN-NEXT: v_mov_b32_e32 v0, 1
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_lshl_b32 s0, s0, 2
+; GCN-NEXT: v_mov_b32_e32 v4, s0
+; GCN-NEXT: ds_read2_b32 v[2:3], v4 offset1:1
+; GCN-NEXT: ds_write_b64 v1, v[0:1] offset:512
+; GCN-NEXT: ds_read2_b32 v[4:5], v4 offset0:64 offset1:65
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
+; GCN-NEXT: ; sched_group_barrier mask(0x00000200) size(1) SyncID(0)
+; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
+; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
+; GCN-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
+; GCN-NEXT: s_endpgm
+; CHECK-LABEL: define amdgpu_kernel void @ds_load_stores_aainfo(
+; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 [[I]]
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_DS_LOAD_STORES_AAINFO_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 1), i32 0, i32 [[I]]
+; CHECK-NEXT: [[VAL_A:%.*]] = load i64, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA1:![0-9]+]], !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
+; CHECK-NEXT: [[VAL_B:%.*]] = load i64, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !alias.scope [[META12:![0-9]+]], !noalias [[META13:![0-9]+]]
+; CHECK-NEXT: store i64 1, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_DS_LOAD_STORES_AAINFO_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 2), align 16, !tbaa [[TBAA1]], !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]]
+; CHECK-NEXT: [[VAL:%.*]] = add i64 [[VAL_A]], [[VAL_B]]
+; CHECK-NEXT: store i64 [[VAL]], ptr addrspace(1) [[ARG]], align 4
+; CHECK-NEXT: tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
+; CHECK-NEXT: tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
+; CHECK-NEXT: tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
+; CHECK-NEXT: ret void
+;
+bb:
+ %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
+ %gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i
+
+ %val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !noalias !5
+ %val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !noalias !5
+
+ store i64 1, ptr addrspace(3) @c, align 4, !tbaa !0, !noalias !2
+
+ %val = add i64 %val.a, %val.b
+ store i64 %val, ptr addrspace(1) %arg, align 4
+
+ tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
+ tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
+ tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
+ ret void
+}
+
+ !0 = !{!"omnipotent char", !1, i64 0}
+ !1 = !{!1}
+ !2 = !{!3}
+ !3 = distinct !{!3, !4}
+ !4 = distinct !{!4}
+ !5 = !{!3}
+;.
+; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0, i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]}
+; CHECK: [[META3]] = distinct !{[[META3]]}
+; CHECK: [[META4]] = !{[[META5:![0-9]+]]}
+; CHECK: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]}
+; CHECK: [[META6]] = distinct !{[[META6]]}
+; CHECK: [[META7]] = !{[[META8:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
+; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]]}
+; CHECK: [[META9]] = distinct !{[[META9]]}
+; CHECK: [[META10]] = distinct !{[[META10]], [[META6]]}
+; CHECK: [[META11]] = distinct !{[[META11]], [[META6]]}
+; CHECK: [[META12]] = !{[[META10]]}
+; CHECK: [[META13]] = !{[[META8]], [[META5]], [[META11]]}
+; CHECK: [[META14]] = !{[[META11]]}
+; CHECK: [[META15]] = !{[[META8]], [[META5]], [[META10]]}
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
index 96e8099..e7f78b4 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
@@ -60,7 +60,7 @@
define amdgpu_kernel void @k_f0() {
; MODULE-LABEL: @k_f0(
-; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META5:![0-9]+]], !noalias [[META1]]
+; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META10:![0-9]+]], !noalias [[META1]]
; MODULE-NEXT: call void @f0()
; MODULE-NEXT: ret void
;
@@ -83,9 +83,9 @@
@both.lds = addrspace(3) global i32 poison
define void @f_both() {
; MODULE-LABEL: @f_both(
-; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
+; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META11:![0-9]+]]
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
-; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
+; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META11]]
; MODULE-NEXT: ret void
;
; TABLE-LABEL: @f_both(
@@ -116,9 +116,9 @@
define amdgpu_kernel void @k0_both() {
; MODULE-LABEL: @k0_both(
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
-; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
+; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META1]]
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5
-; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
+; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META1]]
; MODULE-NEXT: call void @f_both()
; MODULE-NEXT: ret void
;