[AMDGPU] Remove the AnnotateKernelFeatures pass (#130198)

Previously the AnnotateKernelFeatures pass infers two attributes:
amdgpu-calls and amdgpu-stack-objects, which are used to help determine
if flat scratch init is allowed. PR #118907 created the
amdgpu-no-flat-scratch-init attribute. Continuing with that work, this
patch makes use of this attribute to determine flat scratch init,
replacing amdgpu-calls and amdgpu-stack-objects. This also leads to the
removal of the AnnotateKernelFeatures pass.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
index 04d1794..4ac2cc9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
@@ -15,6 +15,9 @@
 ; GFX802-SDAG-LABEL: test_writelane_sreg_i32:
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    s_mov_b32 m0, s3
 ; GFX802-SDAG-NEXT:    s_load_dword s3, s[0:1], 0x0
@@ -53,6 +56,9 @@
 ; GFX802-GISEL-LABEL: test_writelane_sreg_i32:
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    s_mov_b32 m0, s3
 ; GFX802-GISEL-NEXT:    s_load_dword s3, s[0:1], 0x0
@@ -98,6 +104,9 @@
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-SDAG-NEXT:    s_load_dword s6, s[8:9], 0x10
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX802-SDAG-NEXT:    s_mov_b32 m0, s6
@@ -147,6 +156,9 @@
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-GISEL-NEXT:    s_load_dword s6, s[8:9], 0x10
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX802-GISEL-NEXT:    s_mov_b32 m0, s6
@@ -202,6 +214,9 @@
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-SDAG-NEXT:    s_load_dword s6, s[8:9], 0x10
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX802-SDAG-NEXT:    s_mov_b32 m0, s6
@@ -251,6 +266,9 @@
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-GISEL-NEXT:    s_load_dword s6, s[8:9], 0x10
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX802-GISEL-NEXT:    s_mov_b32 m0, s6
@@ -306,6 +324,9 @@
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
 ; GFX802-SDAG-NEXT:    s_load_dword s2, s[8:9], 0x8
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    s_load_dword s3, s[0:1], 0x0
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s0
@@ -348,6 +369,9 @@
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
 ; GFX802-GISEL-NEXT:    s_load_dword s2, s[8:9], 0x8
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    s_load_dword s3, s[0:1], 0x0
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -396,6 +420,9 @@
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
 ; GFX802-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x8
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
@@ -444,6 +471,9 @@
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
 ; GFX802-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x8
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s1
@@ -498,11 +528,14 @@
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
 ; GFX802-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x8
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
 ; GFX802-SDAG-NEXT:    s_mov_b32 s5, 0x40400000
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX802-SDAG-NEXT:    s_mov_b32 m0, s4
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
@@ -551,11 +584,14 @@
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
 ; GFX802-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x8
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
 ; GFX802-GISEL-NEXT:    s_mov_b32 s5, 0x40400000
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX802-GISEL-NEXT:    s_mov_b32 m0, s4
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s1
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s2
@@ -609,6 +645,9 @@
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX802-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
@@ -668,6 +707,9 @@
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s3
@@ -738,6 +780,9 @@
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX802-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
@@ -803,6 +848,9 @@
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 4, v0
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s3
@@ -877,7 +925,9 @@
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
-; GFX802-SDAG-NEXT:    s_mov_b32 s4, 0x40280000
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX802-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
@@ -886,6 +936,7 @@
 ; GFX802-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX802-SDAG-NEXT:    flat_load_dword v2, v[0:1]
 ; GFX802-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX802-SDAG-NEXT:    s_mov_b32 s4, 0x40280000
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
@@ -946,7 +997,9 @@
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 4, v0
-; GFX802-GISEL-NEXT:    s_mov_b32 s4, 0x40280000
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s3
@@ -956,6 +1009,7 @@
 ; GFX802-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX802-GISEL-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
 ; GFX802-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX802-GISEL-NEXT:    s_mov_b32 s4, 0x40280000
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v4, s1
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s0
 ; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -1028,15 +1082,18 @@
 ; GFX802-SDAG-NEXT:    ;;#ASMSTART
 ; GFX802-SDAG-NEXT:    s_mov_b32 m0, -1
 ; GFX802-SDAG-NEXT:    ;;#ASMEND
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
 ; GFX802-SDAG-NEXT:    s_mov_b32 s4, m0
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    s_load_dword s3, s[0:1], 0x0
 ; GFX802-SDAG-NEXT:    s_mov_b32 m0, s2
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s0
-; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s3
 ; GFX802-SDAG-NEXT:    v_writelane_b32 v2, s4, m0
+; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX802-SDAG-NEXT:    flat_store_dword v[0:1], v2
 ; GFX802-SDAG-NEXT:    s_endpgm
 ;
@@ -1081,15 +1138,18 @@
 ; GFX802-GISEL-NEXT:    ;;#ASMSTART
 ; GFX802-GISEL-NEXT:    s_mov_b32 m0, -1
 ; GFX802-GISEL-NEXT:    ;;#ASMEND
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
 ; GFX802-GISEL-NEXT:    s_mov_b32 s4, m0
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    s_load_dword s3, s[0:1], 0x0
 ; GFX802-GISEL-NEXT:    s_mov_b32 m0, s2
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s3
 ; GFX802-GISEL-NEXT:    v_writelane_b32 v2, s4, m0
+; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX802-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX802-GISEL-NEXT:    s_endpgm
 ;
@@ -1138,6 +1198,9 @@
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
 ; GFX802-SDAG-NEXT:    s_load_dword s2, s[8:9], 0x8
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    s_load_dword s3, s[0:1], 0x0
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s0
@@ -1180,6 +1243,9 @@
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
 ; GFX802-GISEL-NEXT:    s_load_dword s2, s[8:9], 0x8
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    s_load_dword s3, s[0:1], 0x0
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -1227,6 +1293,9 @@
 ; GFX802-SDAG-LABEL: test_writelane_imm_i64:
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
@@ -1270,6 +1339,9 @@
 ; GFX802-GISEL-LABEL: test_writelane_imm_i64:
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s1
@@ -1319,6 +1391,9 @@
 ; GFX802-SDAG-LABEL: test_writelane_imm_f64:
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
@@ -1362,6 +1437,9 @@
 ; GFX802-GISEL-LABEL: test_writelane_imm_f64:
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s1
@@ -1412,6 +1490,9 @@
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x0
 ; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x8
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX802-SDAG-NEXT:    s_mov_b32 m0, s3
@@ -1449,6 +1530,9 @@
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x0
 ; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x8
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX802-GISEL-NEXT:    s_mov_b32 m0, s3
@@ -1492,10 +1576,13 @@
 ; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-SDAG-NEXT:    s_load_dword s6, s[8:9], 0x18
 ; GFX802-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX802-SDAG-NEXT:    s_mov_b32 m0, s6
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX802-SDAG-NEXT:    v_writelane_b32 v3, s5, m0
@@ -1538,11 +1625,14 @@
 ; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-GISEL-NEXT:    s_load_dword s6, s[8:9], 0x18
 ; GFX802-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX802-GISEL-NEXT:    s_mov_b32 m0, s6
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    v_writelane_b32 v0, s4, m0
 ; GFX802-GISEL-NEXT:    v_writelane_b32 v1, s5, m0
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s3
@@ -1589,10 +1679,13 @@
 ; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-SDAG-NEXT:    s_load_dword s6, s[8:9], 0x18
 ; GFX802-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX802-SDAG-NEXT:    s_mov_b32 m0, s6
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX802-SDAG-NEXT:    v_writelane_b32 v3, s5, m0
@@ -1635,11 +1728,14 @@
 ; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-GISEL-NEXT:    s_load_dword s6, s[8:9], 0x18
 ; GFX802-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX802-GISEL-NEXT:    s_mov_b32 m0, s6
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    v_writelane_b32 v0, s4, m0
 ; GFX802-GISEL-NEXT:    v_writelane_b32 v1, s5, m0
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s3
@@ -1684,7 +1780,10 @@
 ; GFX802-SDAG-LABEL: test_writelane_imm_oldval_i32:
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, 42
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-SDAG-NEXT:    s_mov_b32 m0, s3
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s0
@@ -1716,7 +1815,10 @@
 ; GFX802-GISEL-LABEL: test_writelane_imm_oldval_i32:
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, 42
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    s_mov_b32 m0, s3
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -1754,11 +1856,14 @@
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x10
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, 42
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX802-SDAG-NEXT:    s_mov_b32 m0, s4
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX802-SDAG-NEXT:    v_writelane_b32 v1, s3, m0
 ; GFX802-SDAG-NEXT:    v_writelane_b32 v0, s2, m0
@@ -1797,11 +1902,14 @@
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x10
 ; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, 42
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    s_mov_b32 m0, s4
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s1
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    v_writelane_b32 v0, s2, m0
 ; GFX802-GISEL-NEXT:    v_writelane_b32 v1, s3, m0
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -1845,11 +1953,14 @@
 ; GFX802-SDAG:       ; %bb.0:
 ; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX802-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x10
+; GFX802-SDAG-NEXT:    s_add_i32 s12, s12, s17
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, 0x40450000
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX802-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
 ; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX802-SDAG-NEXT:    s_mov_b32 m0, s4
+; GFX802-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX802-SDAG-NEXT:    v_writelane_b32 v1, s3, m0
 ; GFX802-SDAG-NEXT:    v_writelane_b32 v0, s2, m0
@@ -1888,11 +1999,14 @@
 ; GFX802-GISEL:       ; %bb.0:
 ; GFX802-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x10
 ; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GFX802-GISEL-NEXT:    s_add_i32 s12, s12, s17
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, 0x40450000
 ; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX802-GISEL-NEXT:    s_mov_b32 m0, s4
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s1
+; GFX802-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GFX802-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX802-GISEL-NEXT:    v_writelane_b32 v0, s2, m0
 ; GFX802-GISEL-NEXT:    v_writelane_b32 v1, s3, m0
 ; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s0