[AMDGPU] Fix using wrong register in frame index shift (#101649)
In case of v_mad we have materialized the offset in vgpr and mad is
performed in wave space, later vgpr have to be shifted back in lane
space. [#99556](https://github.com/llvm/llvm-project/pull/99556)
introduces a bug.
Co-authored-by: Pankajdwivedi-25 <pankajkumar.divedi@amd.com>
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
index 78fb25a..08c2904 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
@@ -708,7 +708,7 @@
; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
; GFX8-NEXT: $vgpr0 = V_MOV_B32_e32 64, implicit $exec
; GFX8-NEXT: $vgpr0 = V_MAD_U32_U24_e64 killed $vgpr0, 64, $sgpr32, 0, implicit $exec
- ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $vgpr0, implicit $exec
; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
@@ -903,7 +903,7 @@
; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
; GFX8-NEXT: $vgpr0 = V_MOV_B32_e32 68, implicit $exec
; GFX8-NEXT: $vgpr0 = V_MAD_U32_U24_e64 killed $vgpr0, 64, $sgpr32, 0, implicit $exec
- ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $vgpr0, implicit $exec
; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
index 9cd92dc..87cfaec 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
@@ -77,7 +77,7 @@
; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32
; GFX7-NEXT: v_mov_b32_e32 v0, 0x4040
; GFX7-NEXT: v_mad_u32_u24 v0, v0, 64, s32
-; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6
+; GFX7-NEXT: v_lshrrev_b32_e32 v0, 6, v0
; GFX7-NEXT: v_writelane_b32 v23, s59, 28
; GFX7-NEXT: v_readfirstlane_b32 s59, v0
; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32
@@ -168,7 +168,7 @@
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s32
; GFX8-NEXT: v_mov_b32_e32 v0, 0x4040
; GFX8-NEXT: v_mad_u32_u24 v0, v0, 64, s32
-; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX8-NEXT: v_lshrrev_b32_e32 v0, 6, v0
; GFX8-NEXT: v_writelane_b32 v23, s59, 28
; GFX8-NEXT: v_readfirstlane_b32 s59, v0
; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32
@@ -841,7 +841,7 @@
; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: v_mad_u32_u24 v22, v22, 64, s32
-; GFX7-NEXT: v_lshr_b32_e64 v22, s32, 6
+; GFX7-NEXT: v_lshrrev_b32_e32 v22, 6, v22
; GFX7-NEXT: v_writelane_b32 v21, s59, 28
; GFX7-NEXT: v_readfirstlane_b32 s59, v22
; GFX7-NEXT: ;;#ASMSTART
@@ -924,7 +924,7 @@
; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_mad_u32_u24 v22, v22, 64, s32
-; GFX8-NEXT: v_lshrrev_b32_e64 v22, 6, s32
+; GFX8-NEXT: v_lshrrev_b32_e32 v22, 6, v22
; GFX8-NEXT: v_writelane_b32 v21, s59, 28
; GFX8-NEXT: v_readfirstlane_b32 s59, v22
; GFX8-NEXT: ;;#ASMSTART