AMDGPU/SI: Assembler: Unify parsing/printing of operands.

Summary:
The goal is for each operand type to have its own parse function and
at the same time share common code for tracking state as different
instruction types share operand types (e.g. glc/glc_flat, etc).

Introduce parseAMDGPUOperand which can parse any optional operand.
DPP and Clamp/OMod have custom handling for now. Sam also suggested
to have class hierarchy for operand types instead of table. This
can be done in separate change.

Remove parseVOP3OptionalOps, parseDS*OptionalOps, parseFlatOptionalOps,
parseMubufOptionalOps, parseDPPOptionalOps.
Reduce number of definitions of AsmOperand's and MatchClasses' by using common base class.
Rename AsmMatcher/InstPrinter methods accordingly.
Print immediate type when printing parsed immediate operand.
Use 'off' if offset/index register is unused instead of skipping it to make it more readable (also agreed with SP3).
Update tests.

Reviewers: tstellarAMD, SamWot, artem.tamazov

Subscribers: qcolombet, arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D19584

llvm-svn: 268015
diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
index 6cf1967..3a0e8f3 100644
--- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
@@ -123,10 +123,10 @@
 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
 
 ; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}}
-; GCN: buffer_store_dword [[FI1]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_store_dword [[FI1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 
 ; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}}
-; GCN: buffer_store_dword [[FI2]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 define void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 {
   %tmp0 = alloca float
   %tmp1 = alloca float
@@ -150,7 +150,7 @@
 
 ; GCN-DAG: s_add_i32 [[BASE_1_OFF_1:s[0-9]+]], 0, 56
 ; GCN-DAG: v_mov_b32_e32 [[V_BASE_1_OFF_1:v[0-9]+]], [[BASE_1_OFF_1]]
-; GCN: buffer_store_dword [[V_BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_store_dword [[V_BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
   %tmp0 = alloca [4096 x i32]
   %tmp1 = alloca [4096 x i32]
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
index 1c5bed3b..82f88a0 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
@@ -42,7 +42,7 @@
 ; OPT: br label
 
 ; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_global_i32:
-; CI: buffer_load_dword {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
+; CI: buffer_load_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
 define void @test_sink_noop_addrspacecast_flat_to_global_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
 entry:
   %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
index f2b83fb..ae419a6 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -40,7 +40,7 @@
 
 ; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
 ; GCN: s_and_saveexec_b64
-; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
+; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
 ; GCN: {{^}}BB1_2:
 ; GCN: s_or_b64 exec
 define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
@@ -67,7 +67,7 @@
 
 ; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
 ; GCN: s_and_saveexec_b64
-; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
+; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
 ; GCN: {{^}}BB2_2:
 ; GCN: s_or_b64 exec
 define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
@@ -94,7 +94,7 @@
 
 ; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
 ; GCN: s_and_saveexec_b64
-; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
+; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
 ; GCN: {{^}}BB3_2:
 ; GCN: s_or_b64 exec
 define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop.ll b/llvm/test/CodeGen/AMDGPU/ctpop.ll
index 839b893..32b071d 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop.ll
@@ -250,8 +250,8 @@
 }
 
 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv:
-; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
-; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
+; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], {{0$}}
+; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:16
 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
 ; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
 ; GCN: buffer_store_dword [[RESULT]],
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll
index 9fe3755..3343b68 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll
@@ -4,8 +4,8 @@
 
 
 ; COMMON-LABEL: {{^}}fdiv_f64:
-; COMMON-DAG: buffer_load_dwordx2 [[NUM:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0
-; COMMON-DAG: buffer_load_dwordx2 [[DEN:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; COMMON-DAG: buffer_load_dwordx2 [[NUM:v\[[0-9]+:[0-9]+\]]], off, {{s\[[0-9]+:[0-9]+\]}}, 0
+; COMMON-DAG: buffer_load_dwordx2 [[DEN:v\[[0-9]+:[0-9]+\]]], off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:8
 ; CI-DAG: v_div_scale_f64 [[SCALE0:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[DEN]], [[DEN]], [[NUM]]
 ; CI-DAG: v_div_scale_f64 [[SCALE1:v\[[0-9]+:[0-9]+\]]], vcc, [[NUM]], [[DEN]], [[NUM]]
 
diff --git a/llvm/test/CodeGen/AMDGPU/fmax3.f64.ll b/llvm/test/CodeGen/AMDGPU/fmax3.f64.ll
index 6d2e273..9bbfe1e 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax3.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax3.f64.ll
@@ -4,9 +4,9 @@
 declare double @llvm.maxnum.f64(double, double) nounwind readnone
 
 ; SI-LABEL: {{^}}test_fmax3_f64:
-; SI-DAG: buffer_load_dwordx2 [[REGA:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0{{$}}
-; SI-DAG: buffer_load_dwordx2 [[REGB:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0 offset:8
-; SI-DAG: buffer_load_dwordx2 [[REGC:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
+; SI-DAG: buffer_load_dwordx2 [[REGA:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}}
+; SI-DAG: buffer_load_dwordx2 [[REGB:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:8
+; SI-DAG: buffer_load_dwordx2 [[REGC:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:16
 ; SI: v_max_f64 [[REGA]], [[REGA]], [[REGB]]
 ; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[REGA]], [[REGC]]
 ; SI: buffer_store_dwordx2 [[RESULT]],
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics.ll b/llvm/test/CodeGen/AMDGPU/global_atomics.ll
index 752ded8..30b197a 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics.ll
@@ -3,7 +3,7 @@
 
 
 ; FUNC-LABEL: {{^}}atomic_add_i32_offset:
-; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -12,7 +12,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset:
-; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -48,7 +48,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_add_i32:
-; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
@@ -56,7 +56,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_add_i32_ret:
-; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -88,7 +88,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_and_i32_offset:
-; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -97,7 +97,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset:
-; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -132,7 +132,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_and_i32:
-; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
@@ -140,7 +140,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_and_i32_ret:
-; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -172,7 +172,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_sub_i32_offset:
-; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -181,7 +181,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset:
-; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -216,7 +216,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_sub_i32:
-; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
@@ -224,7 +224,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_sub_i32_ret:
-; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -256,7 +256,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_max_i32_offset:
-; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -265,7 +265,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset:
-; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -300,7 +300,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_max_i32:
-; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
@@ -308,7 +308,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_max_i32_ret:
-; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -340,7 +340,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umax_i32_offset:
-; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -349,7 +349,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset:
-; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -384,7 +384,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umax_i32:
-; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
@@ -392,7 +392,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umax_i32_ret:
-; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -424,7 +424,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_min_i32_offset:
-; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -433,7 +433,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset:
-; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -468,7 +468,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_min_i32:
-; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
@@ -476,7 +476,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_min_i32_ret:
-; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -508,7 +508,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umin_i32_offset:
-; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -517,7 +517,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset:
-; GCN: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -552,7 +552,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umin_i32:
-; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
@@ -560,7 +560,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umin_i32_ret:
-; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -592,7 +592,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_or_i32_offset:
-; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -601,7 +601,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset:
-; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -636,7 +636,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_or_i32:
-; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
@@ -644,7 +644,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_or_i32_ret:
-; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -676,7 +676,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xchg_i32_offset:
-; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -685,7 +685,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset:
-; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -719,7 +719,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xchg_i32:
-; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
@@ -727,7 +727,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret:
-; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -761,7 +761,7 @@
 ; CMP_SWAP
 
 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_offset:
-; GCN: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -770,7 +770,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
-; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword v[[RET]]
 define void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
 entry:
@@ -806,7 +806,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32:
-; GCN: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
 entry:
   %0  = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
@@ -814,7 +814,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret:
-; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword v[[RET]]
 define void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
 entry:
@@ -848,7 +848,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
-; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -857,7 +857,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset:
-; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -892,7 +892,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xor_i32:
-; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
@@ -900,7 +900,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xor_i32_ret:
-; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -933,7 +933,7 @@
 
 ; ATOMIC_LOAD
 ; FUNC-LABEL: {{^}}atomic_load_i32_offset:
-; SI: buffer_load_dword [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
@@ -945,7 +945,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_load_i32:
-; SI: buffer_load_dword [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
@@ -981,7 +981,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_load_i64_offset:
-; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_load_i64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out) {
@@ -993,7 +993,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_load_i64:
-; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_load_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %out) {
@@ -1030,7 +1030,7 @@
 
 ; ATOMIC_STORE
 ; FUNC-LABEL: {{^}}atomic_store_i32_offset:
-; SI: buffer_store_dword {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
 define void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) {
 entry:
@@ -1040,7 +1040,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_store_i32:
-; SI: buffer_store_dword {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc{{$}}
+; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc{{$}}
 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
 define void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) {
 entry:
@@ -1070,7 +1070,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_store_i64_offset:
-; SI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; SI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; VI: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
 define void @atomic_store_i64_offset(i64 %in, i64 addrspace(1)* %out) {
 entry:
@@ -1080,7 +1080,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_store_i64:
-; SI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] glc
 define void @atomic_store_i64(i64 %in, i64 addrspace(1)* %out) {
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
index 3eabec4..bc37825 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
@@ -3,7 +3,7 @@
 
 
 ; GCN-LABEL: {{^}}atomic_add_i64_offset:
-; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_add_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -12,7 +12,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_add_i64_ret_offset:
-; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_add_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -47,7 +47,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_add_i64:
-; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_add_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile add i64 addrspace(1)* %out, i64 %in seq_cst
@@ -55,7 +55,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_add_i64_ret:
-; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_add_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -87,7 +87,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_and_i64_offset:
-; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_and_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -96,7 +96,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_and_i64_ret_offset:
-; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_and_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -131,7 +131,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_and_i64:
-; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_and_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile and i64 addrspace(1)* %out, i64 %in seq_cst
@@ -139,7 +139,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_and_i64_ret:
-; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_and_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -171,7 +171,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_sub_i64_offset:
-; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_sub_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -180,7 +180,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset:
-; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_sub_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -215,7 +215,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_sub_i64:
-; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_sub_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile sub i64 addrspace(1)* %out, i64 %in seq_cst
@@ -223,7 +223,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_sub_i64_ret:
-; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_sub_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -255,7 +255,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_max_i64_offset:
-; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_max_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -264,7 +264,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_max_i64_ret_offset:
-; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_max_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -299,7 +299,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_max_i64:
-; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_max_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile max i64 addrspace(1)* %out, i64 %in seq_cst
@@ -307,7 +307,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_max_i64_ret:
-; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_max_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -339,7 +339,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umax_i64_offset:
-; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_umax_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -348,7 +348,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset:
-; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_umax_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -383,7 +383,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umax_i64:
-; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_umax_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile umax i64 addrspace(1)* %out, i64 %in seq_cst
@@ -391,7 +391,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umax_i64_ret:
-; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_umax_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -423,7 +423,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_min_i64_offset:
-; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_min_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -432,7 +432,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_min_i64_ret_offset:
-; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_min_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -467,7 +467,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_min_i64:
-; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_min_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile min i64 addrspace(1)* %out, i64 %in seq_cst
@@ -475,7 +475,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_min_i64_ret:
-; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_min_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -507,7 +507,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umin_i64_offset:
-; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_umin_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -516,7 +516,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset:
-; GCN: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_umin_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -551,7 +551,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umin_i64:
-; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_umin_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile umin i64 addrspace(1)* %out, i64 %in seq_cst
@@ -559,7 +559,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umin_i64_ret:
-; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_umin_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -591,7 +591,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_or_i64_offset:
-; GCN: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_or_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -600,7 +600,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_or_i64_ret_offset:
-; GCN: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_or_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -635,7 +635,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_or_i64:
-; GCN: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_or_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile or i64 addrspace(1)* %out, i64 %in seq_cst
@@ -643,7 +643,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_or_i64_ret:
-; GCN: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_or_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -675,7 +675,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xchg_i64_offset:
-; GCN: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_xchg_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -684,7 +684,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
-; GCN: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_xchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -718,7 +718,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xchg_i64:
-; GCN: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_xchg_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile xchg i64 addrspace(1)* %out, i64 %in seq_cst
@@ -726,7 +726,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xchg_i64_ret:
-; GCN: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_xchg_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -758,7 +758,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xor_i64_offset:
-; GCN: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_xor_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -767,7 +767,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xor_i64_ret_offset:
-; GCN: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_xor_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -802,7 +802,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xor_i64:
-; GCN: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_xor_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile xor i64 addrspace(1)* %out, i64 %in seq_cst
@@ -810,7 +810,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xor_i64_ret:
-; GCN: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_xor_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/half.ll b/llvm/test/CodeGen/AMDGPU/half.ll
index 409b2ec..af409af 100644
--- a/llvm/test/CodeGen/AMDGPU/half.ll
+++ b/llvm/test/CodeGen/AMDGPU/half.ll
@@ -13,10 +13,10 @@
 }
 
 ; GCN-LABEL: {{^}}load_v2f16_arg:
-; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
-; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
-; GCN-DAG: buffer_store_short [[V0]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_store_short [[V1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
+; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
+; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
+; GCN-DAG: buffer_store_short [[V0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_short [[V1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
 ; GCN: s_endpgm
 define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
   store <2 x half> %arg, <2 x half> addrspace(1)* %out
@@ -280,8 +280,8 @@
 }
 
 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
-; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
 ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
@@ -378,8 +378,8 @@
 }
 
 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
-; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]]
@@ -407,8 +407,8 @@
 ; GCN: v_cvt_f64_f32_e32
 ; GCN-NOT: v_cvt_f64_f32_e32
 
-; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
 ; GCN: s_endpgm
 define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
   %val = load <3 x half>, <3 x half> addrspace(1)* %in
diff --git a/llvm/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll b/llvm/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll
index 2a01a62..a70da5b 100644
--- a/llvm/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll
+++ b/llvm/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll
@@ -21,7 +21,7 @@
 ; GCN-LABEL: {{^}}test_merge_store_constant_i16_invariant_constant_pointer_load:
 ; GCN: s_load_dwordx2 s{{\[}}[[SPTR_LO:[0-9]+]]:[[SPTR_HI:[0-9]+]]{{\]}}
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x1c8007b
-; GCN: buffer_store_dword [[K]], s{{\[}}[[SPTR_LO]]:
+; GCN: buffer_store_dword [[K]], off, s{{\[}}[[SPTR_LO]]:
 define void @test_merge_store_constant_i16_invariant_constant_pointer_load(i16 addrspace(1)* addrspace(2)* dereferenceable(4096) nonnull %in) #0 {
   %ptr = load i16 addrspace(1)*, i16 addrspace(1)* addrspace(2)* %in, !invariant.load !0
   %ptr.1 = getelementptr i16, i16 addrspace(1)* %ptr, i64 1
@@ -32,4 +32,4 @@
 
 !0 = !{}
 
-attributes #0 = { nounwind }
\ No newline at end of file
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll b/llvm/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll
index 0d9ec45..d0cc00d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll
@@ -7,7 +7,7 @@
 ; FIXME: Out of bounds immediate offset crashes
 
 ; CHECK-LABEL: {{^}}main:
-; CHECK: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 glc slc
+; CHECK: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 glc slc
 ; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen glc slc
 ; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen glc slc
 ; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen glc slc
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
index 48f3fba..d17179e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
@@ -49,7 +49,7 @@
 
 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32:
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
-; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
+; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
 define void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
   store i32 %result, i32 addrspace(1)* %out
@@ -58,7 +58,7 @@
 
 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset:
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
-; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
 define void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
@@ -67,7 +67,7 @@
 }
 
 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32:
-; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 define void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind {
   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
   ret void
@@ -75,7 +75,7 @@
 
 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32_offset:
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
-; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
 define void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
@@ -166,7 +166,7 @@
 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
+; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
 define void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
   store i64 %result, i64 addrspace(1)* %out
@@ -176,7 +176,7 @@
 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
 define void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
@@ -187,7 +187,7 @@
 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 define void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind {
   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
   ret void
@@ -196,7 +196,7 @@
 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64_offset:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
+; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
 define void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
index d79c37c..93aa1fa 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
@@ -49,7 +49,7 @@
 
 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32:
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
-; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
+; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
 define void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
   store i32 %result, i32 addrspace(1)* %out
@@ -58,7 +58,7 @@
 
 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset:
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
-; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
 define void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
@@ -67,7 +67,7 @@
 }
 
 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i32:
-; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 define void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind {
   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
   ret void
@@ -75,7 +75,7 @@
 
 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i32_offset:
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
-; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
 define void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
@@ -166,7 +166,7 @@
 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
+; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
 define void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
   store i64 %result, i64 addrspace(1)* %out
@@ -176,7 +176,7 @@
 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
 define void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
@@ -187,7 +187,7 @@
 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i64:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 define void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind {
   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
   ret void
@@ -196,7 +196,7 @@
 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i64_offset:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
+; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
 define void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
index 1d84872..f513095 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
@@ -2,7 +2,7 @@
 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
 
 ;CHECK-LABEL: {{^}}test1:
-;CHECK: buffer_atomic_swap v0, s[0:3], 0 glc
+;CHECK: buffer_atomic_swap v0, off, s[0:3], 0 glc
 ;CHECK: s_waitcnt vmcnt(0)
 ;CHECK: buffer_atomic_swap v0, v1, s[0:3], 0 idxen glc
 ;CHECK: s_waitcnt vmcnt(0)
@@ -13,9 +13,9 @@
 ;CHECK: buffer_atomic_swap v0, v2, s[0:3], 0 offen offset:42 glc
 ;CHECK-DAG: s_waitcnt vmcnt(0)
 ;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff
-;CHECK: buffer_atomic_swap v0, s[0:3], [[SOFS]] offset:1 glc
+;CHECK: buffer_atomic_swap v0, off, s[0:3], [[SOFS]] offset:1 glc
 ;CHECK: s_waitcnt vmcnt(0)
-;CHECK: buffer_atomic_swap v0, s[0:3], 0{{$}}
+;CHECK: buffer_atomic_swap v0, off, s[0:3], 0{{$}}
 define amdgpu_ps float @test1(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex, i32 %voffset) {
 main_body:
   %o1 = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
@@ -68,7 +68,7 @@
 ; create copies which we don't bother to track here.
 ;
 ;CHECK-LABEL: {{^}}test3:
-;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 glc
+;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 glc
 ;CHECK: s_waitcnt vmcnt(0)
 ;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v2, s[0:3], 0 idxen glc
 ;CHECK: s_waitcnt vmcnt(0)
@@ -79,7 +79,7 @@
 ;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v3, s[0:3], 0 offen offset:42 glc
 ;CHECK-DAG: s_waitcnt vmcnt(0)
 ;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff
-;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[SOFS]] offset:1 glc
+;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[SOFS]] offset:1 glc
 define amdgpu_ps float @test3(<4 x i32> inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) {
 main_body:
   %o1 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
index d8ec770..9fe7161 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
@@ -2,9 +2,9 @@
 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
 
 ;CHECK-LABEL: {{^}}buffer_load:
-;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], 0
-;CHECK: buffer_load_format_xyzw v[4:7], s[0:3], 0 glc
-;CHECK: buffer_load_format_xyzw v[8:11], s[0:3], 0 slc
+;CHECK: buffer_load_format_xyzw v[0:3], off, s[0:3], 0
+;CHECK: buffer_load_format_xyzw v[4:7], off, s[0:3], 0 glc
+;CHECK: buffer_load_format_xyzw v[8:11], off, s[0:3], 0 slc
 ;CHECK: s_waitcnt
 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) {
 main_body:
@@ -18,7 +18,7 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_immoffs:
-;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], 0 offset:42
+;CHECK: buffer_load_format_xyzw v[0:3], off, s[0:3], 0 offset:42
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) {
 main_body:
@@ -27,11 +27,11 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_immoffs_large:
-;CHECK-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 61 offset:4095
+;CHECK-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 61 offset:4095
 ;CHECK-DAG: s_movk_i32 [[OFS1:s[0-9]+]], 0x7fff
-;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS1]] offset:4093
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS1]] offset:4093
 ;CHECK: s_mov_b32 [[OFS2:s[0-9]+]], 0x8fff
-;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS2]] offset:1
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS2]] offset:1
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) {
 main_body:
@@ -45,9 +45,9 @@
 
 ;CHECK-LABEL: {{^}}buffer_load_immoffs_reuse:
 ;CHECK: s_movk_i32 [[OFS:s[0-9]+]], 0xfff
-;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS]] offset:65
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS]] offset:65
 ;CHECK-NOT: s_mov
-;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS]] offset:81
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS]] offset:81
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_immoffs_reuse(<4 x i32> inreg) {
 main_body:
@@ -105,7 +105,7 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_x:
-;CHECK: buffer_load_format_x v0, s[0:3], 0
+;CHECK: buffer_load_format_x v0, off, s[0:3], 0
 ;CHECK: s_waitcnt
 define amdgpu_ps float @buffer_load_x(<4 x i32> inreg %rsrc) {
 main_body:
@@ -114,7 +114,7 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_xy:
-;CHECK: buffer_load_format_xy v[0:1], s[0:3], 0
+;CHECK: buffer_load_format_xy v[0:1], off, s[0:3], 0
 ;CHECK: s_waitcnt
 define amdgpu_ps <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) {
 main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll
index 3d5f69f..a547f00 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll
@@ -2,9 +2,9 @@
 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
 
 ;CHECK-LABEL: {{^}}buffer_load:
-;CHECK: buffer_load_dwordx4 v[0:3], s[0:3], 0
-;CHECK: buffer_load_dwordx4 v[4:7], s[0:3], 0 glc
-;CHECK: buffer_load_dwordx4 v[8:11], s[0:3], 0 slc
+;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+;CHECK: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
+;CHECK: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc
 ;CHECK: s_waitcnt
 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) {
 main_body:
@@ -18,7 +18,7 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_immoffs:
-;CHECK: buffer_load_dwordx4 v[0:3], s[0:3], 0 offset:42
+;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:42
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) {
 main_body:
@@ -28,7 +28,7 @@
 
 ;CHECK-LABEL: {{^}}buffer_load_immoffs_large:
 ;CHECK: s_movk_i32 [[OFFSET:s[0-9]+]], 0x1fff
-;CHECK: buffer_load_dwordx4 v[0:3], s[0:3], [[OFFSET]] offset:1
+;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], [[OFFSET]] offset:1
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) {
 main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
index 63dcd84..555a1d2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
@@ -2,9 +2,9 @@
 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
 
 ;CHECK-LABEL: {{^}}buffer_store:
-;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0
-;CHECK: buffer_store_format_xyzw v[4:7], s[0:3], 0 glc
-;CHECK: buffer_store_format_xyzw v[8:11], s[0:3], 0 slc
+;CHECK: buffer_store_format_xyzw v[0:3], off, s[0:3], 0
+;CHECK: buffer_store_format_xyzw v[4:7], off, s[0:3], 0 glc
+;CHECK: buffer_store_format_xyzw v[8:11], off, s[0:3], 0 slc
 define amdgpu_ps void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
 main_body:
   call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
@@ -14,7 +14,7 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_store_immoffs:
-;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0 offset:42
+;CHECK: buffer_store_format_xyzw v[0:3], off, s[0:3], 0 offset:42
 define amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
 main_body:
   call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll
index c935f9b..5ae255c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll
@@ -2,9 +2,9 @@
 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
 
 ;CHECK-LABEL: {{^}}buffer_store:
-;CHECK: buffer_store_dwordx4 v[0:3], s[0:3], 0
-;CHECK: buffer_store_dwordx4 v[4:7], s[0:3], 0 glc
-;CHECK: buffer_store_dwordx4 v[8:11], s[0:3], 0 slc
+;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+;CHECK: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 glc
+;CHECK: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 slc
 define amdgpu_ps void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
 main_body:
   call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
@@ -14,7 +14,7 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_store_immoffs:
-;CHECK: buffer_store_dwordx4 v[0:3], s[0:3], 0 offset:42
+;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:42
 define amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
 main_body:
   call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
diff --git a/llvm/test/CodeGen/AMDGPU/merge-stores.ll b/llvm/test/CodeGen/AMDGPU/merge-stores.ll
index a8948374..17b4af8 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-stores.ll
+++ b/llvm/test/CodeGen/AMDGPU/merge-stores.ll
@@ -231,8 +231,8 @@
 }
 
 ; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32_nonzero_base:
-; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
-; GCN: buffer_store_dwordx2 [[LOAD]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN: buffer_store_dwordx2 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
 define void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
   %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 2
   %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 3
@@ -334,8 +334,8 @@
 }
 
 ; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32_nonzero_base:
-; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
-; GCN: buffer_store_dwordx4 [[LOAD]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
+; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
+; GCN: buffer_store_dwordx4 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
 define void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
   %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 11
   %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 12
@@ -640,13 +640,13 @@
 
 ; GCN-LABEL: {{^}}copy_v3i32_align4:
 ; GCN-NOT: SCRATCH_RSRC_DWORD
-; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
-; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 ; GCN-NOT: offen
 ; GCN: s_waitcnt vmcnt
 ; GCN-NOT: offen
-; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
 
 ; GCN: ScratchSize: 0{{$}}
 define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 {
@@ -657,13 +657,13 @@
 
 ; GCN-LABEL: {{^}}copy_v3i64_align4:
 ; GCN-NOT: SCRATCH_RSRC_DWORD
-; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
 ; GCN-NOT: offen
 ; GCN: s_waitcnt vmcnt
 ; GCN-NOT: offen
-; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
 ; GCN: ScratchSize: 0{{$}}
 define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 {
   %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4
@@ -673,13 +673,13 @@
 
 ; GCN-LABEL: {{^}}copy_v3f32_align4:
 ; GCN-NOT: SCRATCH_RSRC_DWORD
-; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
-; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 ; GCN-NOT: offen
 ; GCN: s_waitcnt vmcnt
 ; GCN-NOT: offen
-; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
 ; GCN: ScratchSize: 0{{$}}
 define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 {
   %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4
@@ -690,13 +690,13 @@
 
 ; GCN-LABEL: {{^}}copy_v3f64_align4:
 ; GCN-NOT: SCRATCH_RSRC_DWORD
-; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
 ; GCN-NOT: offen
 ; GCN: s_waitcnt vmcnt
 ; GCN-NOT: offen
-; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
 ; GCN: ScratchSize: 0{{$}}
 define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 {
   %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf.ll b/llvm/test/CodeGen/AMDGPU/mubuf.ll
index 4208c53..277b588 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf.ll
@@ -8,7 +8,7 @@
 
 ; MUBUF load with an immediate byte offset that fits into 12-bits
 ; CHECK-LABEL: {{^}}mubuf_load0:
-; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0
+; CHECK: buffer_load_dword v{{[0-9]}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0
 define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1
@@ -19,7 +19,7 @@
 
 ; MUBUF load with the largest possible immediate offset
 ; CHECK-LABEL: {{^}}mubuf_load1:
-; CHECK: buffer_load_ubyte v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0
+; CHECK: buffer_load_ubyte v{{[0-9]}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0
 define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i8, i8 addrspace(1)* %in, i64 4095
@@ -31,7 +31,7 @@
 ; MUBUF load with an immediate byte offset that doesn't fit into 12-bits
 ; CHECK-LABEL: {{^}}mubuf_load2:
 ; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000
-; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x30,0xe0
+; CHECK: buffer_load_dword v{{[0-9]}}, off, s[{{[0-9]+:[0-9]+}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x30,0xe0
 define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1024
@@ -92,7 +92,7 @@
 
 ; MUBUF store with an immediate byte offset that fits into 12-bits
 ; CHECK-LABEL: {{^}}mubuf_store0:
-; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x70,0xe0
+; CHECK: buffer_store_dword v{{[0-9]}}, off, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x70,0xe0
 define void @mubuf_store0(i32 addrspace(1)* %out) {
 entry:
   %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1
@@ -102,7 +102,7 @@
 
 ; MUBUF store with the largest possible immediate offset
 ; CHECK-LABEL: {{^}}mubuf_store1:
-; CHECK: buffer_store_byte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0
+; CHECK: buffer_store_byte v{{[0-9]}}, off, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0
 
 define void @mubuf_store1(i8 addrspace(1)* %out) {
 entry:
@@ -114,7 +114,7 @@
 ; MUBUF store with an immediate byte offset that doesn't fit into 12-bits
 ; CHECK-LABEL: {{^}}mubuf_store2:
 ; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000
-; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x70,0xe0
+; CHECK: buffer_store_dword v{{[0-9]}}, off, s[{{[0-9]:[0-9]}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x70,0xe0
 define void @mubuf_store2(i32 addrspace(1)* %out) {
 entry:
   %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1024
@@ -135,14 +135,14 @@
 }
 
 ; CHECK-LABEL: {{^}}store_sgpr_ptr:
-; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0
+; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0
 define void @store_sgpr_ptr(i32 addrspace(1)* %out) #0 {
   store i32 99, i32 addrspace(1)* %out, align 4
   ret void
 }
 
 ; CHECK-LABEL: {{^}}store_sgpr_ptr_offset:
-; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40
+; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40
 define void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 {
   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 10
   store i32 99, i32 addrspace(1)* %out.gep, align 4
@@ -151,7 +151,7 @@
 
 ; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset:
 ; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000
-; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
+; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
 define void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 {
   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768
   store i32 99, i32 addrspace(1)* %out.gep, align 4
@@ -160,7 +160,7 @@
 
 ; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset_atomic:
 ; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000
-; CHECK: buffer_atomic_add v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
+; CHECK: buffer_atomic_add v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
 define void @store_sgpr_ptr_large_offset_atomic(i32 addrspace(1)* %out) #0 {
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768
   %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 5 seq_cst
diff --git a/llvm/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll b/llvm/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll
index c255b05..c902cb9 100644
--- a/llvm/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll
@@ -25,7 +25,7 @@
 }
 
 ; GCN-LABEL: {{^}}reduce_i64_align_4_bitcast_v2i32_elt1:
-; GCN: buffer_load_dword [[VAL:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4
+; GCN: buffer_load_dword [[VAL:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4
 ; GCN: buffer_store_dword [[VAL]]
 define void @reduce_i64_align_4_bitcast_v2i32_elt1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
   %a = load i64, i64 addrspace(1)* %in, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-global-loads.ll b/llvm/test/CodeGen/AMDGPU/schedule-global-loads.ll
index 41ff202..faf03c3 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-global-loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-global-loads.ll
@@ -7,8 +7,8 @@
 ; ordering the loads so that the lower address loads come first.
 
 ; FUNC-LABEL: {{^}}cluster_global_arg_loads:
-; SI-DAG: buffer_load_dword [[REG0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4
+; SI-DAG: buffer_load_dword [[REG0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4
 ; SI: buffer_store_dword [[REG0]]
 ; SI: buffer_store_dword [[REG1]]
 define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
index 34b85cd..14fe99c 100644
--- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
@@ -184,12 +184,12 @@
 }
 
 ; FUNC-LABEL: @reorder_global_offsets
-; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
-; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
-; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
-; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
-; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
-; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
+; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
+; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
+; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
+; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
+; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
+; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
 ; CI: buffer_store_dword
 ; CI: s_endpgm
 define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
index 1e7d37a..289d6f4 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
@@ -18,7 +18,7 @@
 ; CHECK: s_or_b64 exec, exec, s[2:3]
 ; CHECK-NEXT: s_mov_b32 s7, 0xf000
 ; CHECK-NEXT: s_mov_b32 s6, -1
-; CHECK-NEXT: buffer_store_dword v1, s[4:7], 0
+; CHECK-NEXT: buffer_store_dword v1, off, s[4:7], 0
 ; CHECK-NEXT: s_endpgm
 define void @foobar(float %a0, float %a1, float addrspace(1)* %out) nounwind {
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll
index ff05027..4a209ca 100644
--- a/llvm/test/CodeGen/AMDGPU/v_mac.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll
@@ -2,9 +2,9 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}mac_vvv:
-; GCN: buffer_load_dword [[A:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0{{$}}
-; GCN: buffer_load_dword [[B:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:4
-; GCN: buffer_load_dword [[C:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:8
+; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}}
+; GCN: buffer_load_dword [[B:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:4
+; GCN: buffer_load_dword [[C:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:8
 ; GCN: v_mac_f32_e32 [[C]], [[B]], [[A]]
 ; GCN: buffer_store_dword [[C]]
 define void @mac_vvv(float addrspace(1)* %out, float addrspace(1)* %in) {
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
index b303166..12afaa0 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
@@ -27,7 +27,7 @@
 ; VIMESA-NEXT: s_mov_b32 s15, 0x980000
 
 
-; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
+; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
 
 ; GCN: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
 ; GCN: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
index b755b78..851a9f8 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -19,8 +19,8 @@
 ; VI-NEXT: s_mov_b32 s15, 0x980000
 
 ; s12 is offset user SGPR
-; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Spill
-; GCN: buffer_load_dword v{{[0-9]+}}, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Reload
+; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Spill
+; GCN: buffer_load_dword v{{[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Reload
 
 ; GCN: NumVgprs: 256
 ; GCN: ScratchSize: 1024