[AMDGPU] Flag new raw/struct atomic ops as source of divergence

Differential Revision: https://reviews.llvm.org/D60731

Change-Id: I821d93dec8b9cdd247b8172d92fb5e15340a9e7d

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358579 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 916b1ef..c376d2e 100644
--- a/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -71,6 +71,28 @@
 def : SourceOfDivergence<int_amdgcn_buffer_atomic_or>;
 def : SourceOfDivergence<int_amdgcn_buffer_atomic_xor>;
 def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_add>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_sub>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_smin>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umin>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_smax>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umax>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_and>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_or>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_xor>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_swap>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_add>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_sub>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_smin>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umin>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_smax>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umax>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_and>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_or>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_xor>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
 def : SourceOfDivergence<int_amdgcn_ps_live>;
 def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
 def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
index 34ceb35..5bc5fe3 100644
--- a/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
+++ b/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
@@ -88,6 +88,182 @@
   ret float %r
 }
 
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(
+define float @raw_buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(
+define float @raw_buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(
+define float @raw_buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(
+define float @raw_buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(
+define float @raw_buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(
+define float @raw_buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(
+define float @raw_buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(
+define float @raw_buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(
+define float @raw_buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(
+define float @raw_buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(
+define float @raw_buffer_atomic_cmpswap(<4 x i32> inreg %rsrc, i32 inreg %data, i32 inreg %cmp) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(
+define float @struct_buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(
+define float @struct_buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(
+define float @struct_buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(
+define float @struct_buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(
+define float @struct_buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(
+define float @struct_buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(
+define float @struct_buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(
+define float @struct_buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(
+define float @struct_buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(
+define float @struct_buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(
+define float @struct_buffer_atomic_cmpswap(<4 x i32> inreg %rsrc, i32 inreg %data, i32 inreg %cmp) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
 declare i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i1) #0
 declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #0
 declare i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i1) #0
@@ -100,4 +276,28 @@
 declare i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i1) #0
 declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0
 
+declare i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32) #0
+
+declare i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32) #0
+
 attributes #0 = { nounwind }