blob: 2420393b63ba9bccafd8ba67127c7fdb40b81cf0 [file] [log] [blame]
Matt Arsenaultef676642024-07-31 23:10:15 +04001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack < %s | FileCheck -check-prefix=GFX8 %s
Matt Arsenault39337ff22024-12-02 09:23:17 -05004; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck -check-prefixes=GFX900 %s
Fabian Rittera33a84e2025-02-13 15:17:12 +01005; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack < %s | FileCheck -check-prefixes=GFX942 %s
Matt Arsenaultef676642024-07-31 23:10:15 +04006; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10_1 %s
7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10_3 %s
8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
9; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
10
11%asm.output = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs
12 <16 x i32>, <7 x i32>, ; vgprs
13 i64 ; vcc
14 }
15
16%asm.output2 = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs
17 <16 x i32>, <5 x i32>, ; vgprs
18 i64 ; vcc
19 }
20
21%asm.output3 = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, ; sgprs
22 <16 x i32>, <6 x i32>, ; vgprs
23 i64 ; vcc
24 }
25
26; %alloca1 should end up materializing with s_mov_b32, but scc is
27; unavailable.
28;
29; This is primarily to test gfx7 and gfx8, which do not have vector
30; add with no carry.
31;
32define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 {
33; GFX7-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
34; GFX7: ; %bb.0:
35; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
37; GFX7-NEXT: s_add_i32 s6, s32, 0x101100
38; GFX7-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
39; GFX7-NEXT: s_mov_b64 exec, s[4:5]
40; GFX7-NEXT: v_writelane_b32 v23, s30, 0
41; GFX7-NEXT: v_writelane_b32 v23, s31, 1
42; GFX7-NEXT: v_writelane_b32 v23, s33, 2
43; GFX7-NEXT: v_writelane_b32 v23, s34, 3
44; GFX7-NEXT: v_writelane_b32 v23, s35, 4
45; GFX7-NEXT: v_writelane_b32 v23, s36, 5
46; GFX7-NEXT: v_writelane_b32 v23, s37, 6
47; GFX7-NEXT: v_writelane_b32 v23, s38, 7
48; GFX7-NEXT: v_writelane_b32 v23, s39, 8
49; GFX7-NEXT: v_writelane_b32 v23, s40, 9
50; GFX7-NEXT: v_writelane_b32 v23, s41, 10
51; GFX7-NEXT: v_writelane_b32 v23, s42, 11
52; GFX7-NEXT: v_writelane_b32 v23, s43, 12
53; GFX7-NEXT: v_writelane_b32 v23, s44, 13
54; GFX7-NEXT: v_writelane_b32 v23, s45, 14
55; GFX7-NEXT: v_writelane_b32 v23, s46, 15
56; GFX7-NEXT: v_writelane_b32 v23, s47, 16
57; GFX7-NEXT: v_writelane_b32 v23, s48, 17
58; GFX7-NEXT: v_writelane_b32 v23, s49, 18
59; GFX7-NEXT: v_writelane_b32 v23, s50, 19
60; GFX7-NEXT: v_writelane_b32 v23, s51, 20
61; GFX7-NEXT: v_writelane_b32 v23, s52, 21
62; GFX7-NEXT: v_writelane_b32 v23, s53, 22
63; GFX7-NEXT: v_writelane_b32 v23, s54, 23
64; GFX7-NEXT: v_writelane_b32 v23, s55, 24
65; GFX7-NEXT: v_writelane_b32 v23, s56, 25
66; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6
67; GFX7-NEXT: v_writelane_b32 v23, s57, 26
68; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0
69; GFX7-NEXT: s_and_b64 s[4:5], 0, exec
70; GFX7-NEXT: v_writelane_b32 v23, s58, 27
71; GFX7-NEXT: ;;#ASMSTART
72; GFX7-NEXT: ; use alloca0 v0
73; GFX7-NEXT: ;;#ASMEND
74; GFX7-NEXT: ;;#ASMSTART
75; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
76; GFX7-NEXT: ;;#ASMEND
77; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32
78; GFX7-NEXT: v_mov_b32_e32 v0, 0x4040
79; GFX7-NEXT: v_mad_u32_u24 v0, v0, 64, s32
Pankaj Dwivediadac04ff2024-08-02 17:17:32 +053080; GFX7-NEXT: v_lshrrev_b32_e32 v0, 6, v0
Matt Arsenaultef676642024-07-31 23:10:15 +040081; GFX7-NEXT: v_writelane_b32 v23, s59, 28
82; GFX7-NEXT: v_readfirstlane_b32 s59, v0
83; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32
84; GFX7-NEXT: s_waitcnt vmcnt(0)
85; GFX7-NEXT: ;;#ASMSTART
86; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
87; GFX7-NEXT: ;;#ASMEND
88; GFX7-NEXT: v_readlane_b32 s59, v23, 28
89; GFX7-NEXT: v_readlane_b32 s58, v23, 27
90; GFX7-NEXT: v_readlane_b32 s57, v23, 26
91; GFX7-NEXT: v_readlane_b32 s56, v23, 25
92; GFX7-NEXT: v_readlane_b32 s55, v23, 24
93; GFX7-NEXT: v_readlane_b32 s54, v23, 23
94; GFX7-NEXT: v_readlane_b32 s53, v23, 22
95; GFX7-NEXT: v_readlane_b32 s52, v23, 21
96; GFX7-NEXT: v_readlane_b32 s51, v23, 20
97; GFX7-NEXT: v_readlane_b32 s50, v23, 19
98; GFX7-NEXT: v_readlane_b32 s49, v23, 18
99; GFX7-NEXT: v_readlane_b32 s48, v23, 17
100; GFX7-NEXT: v_readlane_b32 s47, v23, 16
101; GFX7-NEXT: v_readlane_b32 s46, v23, 15
102; GFX7-NEXT: v_readlane_b32 s45, v23, 14
103; GFX7-NEXT: v_readlane_b32 s44, v23, 13
104; GFX7-NEXT: v_readlane_b32 s43, v23, 12
105; GFX7-NEXT: v_readlane_b32 s42, v23, 11
106; GFX7-NEXT: v_readlane_b32 s41, v23, 10
107; GFX7-NEXT: v_readlane_b32 s40, v23, 9
108; GFX7-NEXT: v_readlane_b32 s39, v23, 8
109; GFX7-NEXT: v_readlane_b32 s38, v23, 7
110; GFX7-NEXT: v_readlane_b32 s37, v23, 6
111; GFX7-NEXT: v_readlane_b32 s36, v23, 5
112; GFX7-NEXT: v_readlane_b32 s35, v23, 4
113; GFX7-NEXT: v_readlane_b32 s34, v23, 3
114; GFX7-NEXT: v_readlane_b32 s33, v23, 2
115; GFX7-NEXT: v_readlane_b32 s31, v23, 1
116; GFX7-NEXT: v_readlane_b32 s30, v23, 0
117; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
118; GFX7-NEXT: s_add_i32 s6, s32, 0x101100
119; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
120; GFX7-NEXT: s_mov_b64 exec, s[4:5]
121; GFX7-NEXT: s_waitcnt vmcnt(0)
122; GFX7-NEXT: s_setpc_b64 s[30:31]
123;
124; GFX8-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
125; GFX8: ; %bb.0:
126; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
128; GFX8-NEXT: s_add_i32 s6, s32, 0x101100
129; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
130; GFX8-NEXT: s_mov_b64 exec, s[4:5]
131; GFX8-NEXT: v_writelane_b32 v23, s30, 0
132; GFX8-NEXT: v_writelane_b32 v23, s31, 1
133; GFX8-NEXT: v_writelane_b32 v23, s33, 2
134; GFX8-NEXT: v_writelane_b32 v23, s34, 3
135; GFX8-NEXT: v_writelane_b32 v23, s35, 4
136; GFX8-NEXT: v_writelane_b32 v23, s36, 5
137; GFX8-NEXT: v_writelane_b32 v23, s37, 6
138; GFX8-NEXT: v_writelane_b32 v23, s38, 7
139; GFX8-NEXT: v_writelane_b32 v23, s39, 8
140; GFX8-NEXT: v_writelane_b32 v23, s40, 9
141; GFX8-NEXT: v_writelane_b32 v23, s41, 10
142; GFX8-NEXT: v_writelane_b32 v23, s42, 11
143; GFX8-NEXT: v_writelane_b32 v23, s43, 12
144; GFX8-NEXT: v_writelane_b32 v23, s44, 13
145; GFX8-NEXT: v_writelane_b32 v23, s45, 14
146; GFX8-NEXT: v_writelane_b32 v23, s46, 15
147; GFX8-NEXT: v_writelane_b32 v23, s47, 16
148; GFX8-NEXT: v_writelane_b32 v23, s48, 17
149; GFX8-NEXT: v_writelane_b32 v23, s49, 18
150; GFX8-NEXT: v_writelane_b32 v23, s50, 19
151; GFX8-NEXT: v_writelane_b32 v23, s51, 20
152; GFX8-NEXT: v_writelane_b32 v23, s52, 21
153; GFX8-NEXT: v_writelane_b32 v23, s53, 22
154; GFX8-NEXT: v_writelane_b32 v23, s54, 23
155; GFX8-NEXT: v_writelane_b32 v23, s55, 24
156; GFX8-NEXT: v_writelane_b32 v23, s56, 25
157; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
158; GFX8-NEXT: v_writelane_b32 v23, s57, 26
159; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
160; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
161; GFX8-NEXT: v_writelane_b32 v23, s58, 27
162; GFX8-NEXT: ;;#ASMSTART
163; GFX8-NEXT: ; use alloca0 v0
164; GFX8-NEXT: ;;#ASMEND
165; GFX8-NEXT: ;;#ASMSTART
166; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
167; GFX8-NEXT: ;;#ASMEND
168; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s32
169; GFX8-NEXT: v_mov_b32_e32 v0, 0x4040
170; GFX8-NEXT: v_mad_u32_u24 v0, v0, 64, s32
Pankaj Dwivediadac04ff2024-08-02 17:17:32 +0530171; GFX8-NEXT: v_lshrrev_b32_e32 v0, 6, v0
Matt Arsenaultef676642024-07-31 23:10:15 +0400172; GFX8-NEXT: v_writelane_b32 v23, s59, 28
173; GFX8-NEXT: v_readfirstlane_b32 s59, v0
174; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32
175; GFX8-NEXT: s_waitcnt vmcnt(0)
176; GFX8-NEXT: ;;#ASMSTART
177; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
178; GFX8-NEXT: ;;#ASMEND
179; GFX8-NEXT: v_readlane_b32 s59, v23, 28
180; GFX8-NEXT: v_readlane_b32 s58, v23, 27
181; GFX8-NEXT: v_readlane_b32 s57, v23, 26
182; GFX8-NEXT: v_readlane_b32 s56, v23, 25
183; GFX8-NEXT: v_readlane_b32 s55, v23, 24
184; GFX8-NEXT: v_readlane_b32 s54, v23, 23
185; GFX8-NEXT: v_readlane_b32 s53, v23, 22
186; GFX8-NEXT: v_readlane_b32 s52, v23, 21
187; GFX8-NEXT: v_readlane_b32 s51, v23, 20
188; GFX8-NEXT: v_readlane_b32 s50, v23, 19
189; GFX8-NEXT: v_readlane_b32 s49, v23, 18
190; GFX8-NEXT: v_readlane_b32 s48, v23, 17
191; GFX8-NEXT: v_readlane_b32 s47, v23, 16
192; GFX8-NEXT: v_readlane_b32 s46, v23, 15
193; GFX8-NEXT: v_readlane_b32 s45, v23, 14
194; GFX8-NEXT: v_readlane_b32 s44, v23, 13
195; GFX8-NEXT: v_readlane_b32 s43, v23, 12
196; GFX8-NEXT: v_readlane_b32 s42, v23, 11
197; GFX8-NEXT: v_readlane_b32 s41, v23, 10
198; GFX8-NEXT: v_readlane_b32 s40, v23, 9
199; GFX8-NEXT: v_readlane_b32 s39, v23, 8
200; GFX8-NEXT: v_readlane_b32 s38, v23, 7
201; GFX8-NEXT: v_readlane_b32 s37, v23, 6
202; GFX8-NEXT: v_readlane_b32 s36, v23, 5
203; GFX8-NEXT: v_readlane_b32 s35, v23, 4
204; GFX8-NEXT: v_readlane_b32 s34, v23, 3
205; GFX8-NEXT: v_readlane_b32 s33, v23, 2
206; GFX8-NEXT: v_readlane_b32 s31, v23, 1
207; GFX8-NEXT: v_readlane_b32 s30, v23, 0
208; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
209; GFX8-NEXT: s_add_i32 s6, s32, 0x101100
210; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
211; GFX8-NEXT: s_mov_b64 exec, s[4:5]
212; GFX8-NEXT: s_waitcnt vmcnt(0)
213; GFX8-NEXT: s_setpc_b64 s[30:31]
214;
215; GFX900-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
216; GFX900: ; %bb.0:
217; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
219; GFX900-NEXT: s_add_i32 s6, s32, 0x101100
220; GFX900-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
221; GFX900-NEXT: s_mov_b64 exec, s[4:5]
222; GFX900-NEXT: v_writelane_b32 v23, s30, 0
223; GFX900-NEXT: v_writelane_b32 v23, s31, 1
224; GFX900-NEXT: v_writelane_b32 v23, s33, 2
225; GFX900-NEXT: v_writelane_b32 v23, s34, 3
226; GFX900-NEXT: v_writelane_b32 v23, s35, 4
227; GFX900-NEXT: v_writelane_b32 v23, s36, 5
228; GFX900-NEXT: v_writelane_b32 v23, s37, 6
229; GFX900-NEXT: v_writelane_b32 v23, s38, 7
230; GFX900-NEXT: v_writelane_b32 v23, s39, 8
231; GFX900-NEXT: v_writelane_b32 v23, s40, 9
232; GFX900-NEXT: v_writelane_b32 v23, s41, 10
233; GFX900-NEXT: v_writelane_b32 v23, s42, 11
234; GFX900-NEXT: v_writelane_b32 v23, s43, 12
235; GFX900-NEXT: v_writelane_b32 v23, s44, 13
236; GFX900-NEXT: v_writelane_b32 v23, s45, 14
237; GFX900-NEXT: v_writelane_b32 v23, s46, 15
238; GFX900-NEXT: v_writelane_b32 v23, s47, 16
239; GFX900-NEXT: v_writelane_b32 v23, s48, 17
240; GFX900-NEXT: v_writelane_b32 v23, s49, 18
241; GFX900-NEXT: v_writelane_b32 v23, s50, 19
242; GFX900-NEXT: v_writelane_b32 v23, s51, 20
243; GFX900-NEXT: v_writelane_b32 v23, s52, 21
244; GFX900-NEXT: v_writelane_b32 v23, s53, 22
245; GFX900-NEXT: v_writelane_b32 v23, s54, 23
246; GFX900-NEXT: v_writelane_b32 v23, s55, 24
247; GFX900-NEXT: v_writelane_b32 v23, s56, 25
248; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
249; GFX900-NEXT: v_writelane_b32 v23, s57, 26
250; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
251; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
252; GFX900-NEXT: v_writelane_b32 v23, s58, 27
253; GFX900-NEXT: ;;#ASMSTART
254; GFX900-NEXT: ; use alloca0 v0
255; GFX900-NEXT: ;;#ASMEND
256; GFX900-NEXT: ;;#ASMSTART
257; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
258; GFX900-NEXT: ;;#ASMEND
259; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32
260; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
261; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0
262; GFX900-NEXT: v_writelane_b32 v23, s59, 28
263; GFX900-NEXT: v_readfirstlane_b32 s59, v0
264; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32
265; GFX900-NEXT: s_waitcnt vmcnt(0)
266; GFX900-NEXT: ;;#ASMSTART
267; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
268; GFX900-NEXT: ;;#ASMEND
269; GFX900-NEXT: v_readlane_b32 s59, v23, 28
270; GFX900-NEXT: v_readlane_b32 s58, v23, 27
271; GFX900-NEXT: v_readlane_b32 s57, v23, 26
272; GFX900-NEXT: v_readlane_b32 s56, v23, 25
273; GFX900-NEXT: v_readlane_b32 s55, v23, 24
274; GFX900-NEXT: v_readlane_b32 s54, v23, 23
275; GFX900-NEXT: v_readlane_b32 s53, v23, 22
276; GFX900-NEXT: v_readlane_b32 s52, v23, 21
277; GFX900-NEXT: v_readlane_b32 s51, v23, 20
278; GFX900-NEXT: v_readlane_b32 s50, v23, 19
279; GFX900-NEXT: v_readlane_b32 s49, v23, 18
280; GFX900-NEXT: v_readlane_b32 s48, v23, 17
281; GFX900-NEXT: v_readlane_b32 s47, v23, 16
282; GFX900-NEXT: v_readlane_b32 s46, v23, 15
283; GFX900-NEXT: v_readlane_b32 s45, v23, 14
284; GFX900-NEXT: v_readlane_b32 s44, v23, 13
285; GFX900-NEXT: v_readlane_b32 s43, v23, 12
286; GFX900-NEXT: v_readlane_b32 s42, v23, 11
287; GFX900-NEXT: v_readlane_b32 s41, v23, 10
288; GFX900-NEXT: v_readlane_b32 s40, v23, 9
289; GFX900-NEXT: v_readlane_b32 s39, v23, 8
290; GFX900-NEXT: v_readlane_b32 s38, v23, 7
291; GFX900-NEXT: v_readlane_b32 s37, v23, 6
292; GFX900-NEXT: v_readlane_b32 s36, v23, 5
293; GFX900-NEXT: v_readlane_b32 s35, v23, 4
294; GFX900-NEXT: v_readlane_b32 s34, v23, 3
295; GFX900-NEXT: v_readlane_b32 s33, v23, 2
296; GFX900-NEXT: v_readlane_b32 s31, v23, 1
297; GFX900-NEXT: v_readlane_b32 s30, v23, 0
298; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
299; GFX900-NEXT: s_add_i32 s6, s32, 0x101100
300; GFX900-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
301; GFX900-NEXT: s_mov_b64 exec, s[4:5]
302; GFX900-NEXT: s_waitcnt vmcnt(0)
303; GFX900-NEXT: s_setpc_b64 s[30:31]
304;
Fabian Rittera33a84e2025-02-13 15:17:12 +0100305; GFX942-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
306; GFX942: ; %bb.0:
307; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
309; GFX942-NEXT: s_add_i32 s2, s32, 0x4044
310; GFX942-NEXT: scratch_store_dword off, v23, s2 ; 4-byte Folded Spill
311; GFX942-NEXT: s_mov_b64 exec, s[0:1]
312; GFX942-NEXT: v_writelane_b32 v23, s30, 0
313; GFX942-NEXT: v_writelane_b32 v23, s31, 1
314; GFX942-NEXT: v_writelane_b32 v23, s33, 2
315; GFX942-NEXT: v_writelane_b32 v23, s34, 3
316; GFX942-NEXT: v_writelane_b32 v23, s35, 4
317; GFX942-NEXT: v_writelane_b32 v23, s36, 5
318; GFX942-NEXT: v_writelane_b32 v23, s37, 6
319; GFX942-NEXT: v_writelane_b32 v23, s38, 7
320; GFX942-NEXT: v_writelane_b32 v23, s39, 8
321; GFX942-NEXT: v_writelane_b32 v23, s40, 9
322; GFX942-NEXT: v_writelane_b32 v23, s41, 10
323; GFX942-NEXT: v_writelane_b32 v23, s42, 11
324; GFX942-NEXT: v_writelane_b32 v23, s43, 12
325; GFX942-NEXT: v_writelane_b32 v23, s44, 13
326; GFX942-NEXT: v_writelane_b32 v23, s45, 14
327; GFX942-NEXT: v_writelane_b32 v23, s46, 15
328; GFX942-NEXT: v_writelane_b32 v23, s47, 16
329; GFX942-NEXT: v_writelane_b32 v23, s48, 17
330; GFX942-NEXT: v_writelane_b32 v23, s49, 18
331; GFX942-NEXT: v_writelane_b32 v23, s50, 19
332; GFX942-NEXT: v_writelane_b32 v23, s51, 20
333; GFX942-NEXT: v_writelane_b32 v23, s52, 21
334; GFX942-NEXT: v_writelane_b32 v23, s53, 22
335; GFX942-NEXT: v_writelane_b32 v23, s54, 23
336; GFX942-NEXT: v_writelane_b32 v23, s55, 24
337; GFX942-NEXT: v_writelane_b32 v23, s56, 25
338; GFX942-NEXT: v_writelane_b32 v23, s57, 26
339; GFX942-NEXT: v_writelane_b32 v23, s58, 27
340; GFX942-NEXT: v_writelane_b32 v23, s59, 28
341; GFX942-NEXT: v_writelane_b32 v23, s60, 29
342; GFX942-NEXT: s_add_i32 s0, s32, 64
343; GFX942-NEXT: v_writelane_b32 v23, s61, 30
344; GFX942-NEXT: v_mov_b32_e32 v0, s0
345; GFX942-NEXT: s_and_b64 s[60:61], 0, exec
346; GFX942-NEXT: ;;#ASMSTART
347; GFX942-NEXT: ; use alloca0 v0
348; GFX942-NEXT: ;;#ASMEND
349; GFX942-NEXT: ;;#ASMSTART
350; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
351; GFX942-NEXT: ;;#ASMEND
352; GFX942-NEXT: s_addc_u32 s60, s32, 0x4040
353; GFX942-NEXT: s_bitcmp1_b32 s60, 0
354; GFX942-NEXT: s_bitset0_b32 s60, 0
355; GFX942-NEXT: s_mov_b32 s59, s60
356; GFX942-NEXT: ;;#ASMSTART
357; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
358; GFX942-NEXT: ;;#ASMEND
359; GFX942-NEXT: v_readlane_b32 s61, v23, 30
360; GFX942-NEXT: v_readlane_b32 s60, v23, 29
361; GFX942-NEXT: v_readlane_b32 s59, v23, 28
362; GFX942-NEXT: v_readlane_b32 s58, v23, 27
363; GFX942-NEXT: v_readlane_b32 s57, v23, 26
364; GFX942-NEXT: v_readlane_b32 s56, v23, 25
365; GFX942-NEXT: v_readlane_b32 s55, v23, 24
366; GFX942-NEXT: v_readlane_b32 s54, v23, 23
367; GFX942-NEXT: v_readlane_b32 s53, v23, 22
368; GFX942-NEXT: v_readlane_b32 s52, v23, 21
369; GFX942-NEXT: v_readlane_b32 s51, v23, 20
370; GFX942-NEXT: v_readlane_b32 s50, v23, 19
371; GFX942-NEXT: v_readlane_b32 s49, v23, 18
372; GFX942-NEXT: v_readlane_b32 s48, v23, 17
373; GFX942-NEXT: v_readlane_b32 s47, v23, 16
374; GFX942-NEXT: v_readlane_b32 s46, v23, 15
375; GFX942-NEXT: v_readlane_b32 s45, v23, 14
376; GFX942-NEXT: v_readlane_b32 s44, v23, 13
377; GFX942-NEXT: v_readlane_b32 s43, v23, 12
378; GFX942-NEXT: v_readlane_b32 s42, v23, 11
379; GFX942-NEXT: v_readlane_b32 s41, v23, 10
380; GFX942-NEXT: v_readlane_b32 s40, v23, 9
381; GFX942-NEXT: v_readlane_b32 s39, v23, 8
382; GFX942-NEXT: v_readlane_b32 s38, v23, 7
383; GFX942-NEXT: v_readlane_b32 s37, v23, 6
384; GFX942-NEXT: v_readlane_b32 s36, v23, 5
385; GFX942-NEXT: v_readlane_b32 s35, v23, 4
386; GFX942-NEXT: v_readlane_b32 s34, v23, 3
387; GFX942-NEXT: v_readlane_b32 s33, v23, 2
388; GFX942-NEXT: v_readlane_b32 s31, v23, 1
389; GFX942-NEXT: v_readlane_b32 s30, v23, 0
390; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
391; GFX942-NEXT: s_add_i32 s2, s32, 0x4044
392; GFX942-NEXT: scratch_load_dword v23, off, s2 ; 4-byte Folded Reload
393; GFX942-NEXT: s_mov_b64 exec, s[0:1]
394; GFX942-NEXT: s_waitcnt vmcnt(0)
395; GFX942-NEXT: s_setpc_b64 s[30:31]
Matt Arsenaultef676642024-07-31 23:10:15 +0400396;
397; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
398; GFX10_1: ; %bb.0:
399; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
400; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
401; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880
402; GFX10_1-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill
403; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
404; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
405; GFX10_1-NEXT: v_writelane_b32 v23, s30, 0
406; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
407; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
408; GFX10_1-NEXT: v_writelane_b32 v23, s31, 1
409; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
410; GFX10_1-NEXT: ;;#ASMSTART
411; GFX10_1-NEXT: ; use alloca0 v0
412; GFX10_1-NEXT: ;;#ASMEND
413; GFX10_1-NEXT: v_writelane_b32 v23, s33, 2
414; GFX10_1-NEXT: v_writelane_b32 v23, s34, 3
415; GFX10_1-NEXT: v_writelane_b32 v23, s35, 4
416; GFX10_1-NEXT: v_writelane_b32 v23, s36, 5
417; GFX10_1-NEXT: v_writelane_b32 v23, s37, 6
418; GFX10_1-NEXT: v_writelane_b32 v23, s38, 7
419; GFX10_1-NEXT: v_writelane_b32 v23, s39, 8
420; GFX10_1-NEXT: v_writelane_b32 v23, s40, 9
421; GFX10_1-NEXT: v_writelane_b32 v23, s41, 10
422; GFX10_1-NEXT: v_writelane_b32 v23, s42, 11
423; GFX10_1-NEXT: v_writelane_b32 v23, s43, 12
424; GFX10_1-NEXT: v_writelane_b32 v23, s44, 13
425; GFX10_1-NEXT: v_writelane_b32 v23, s45, 14
426; GFX10_1-NEXT: v_writelane_b32 v23, s46, 15
427; GFX10_1-NEXT: v_writelane_b32 v23, s47, 16
428; GFX10_1-NEXT: v_writelane_b32 v23, s48, 17
429; GFX10_1-NEXT: v_writelane_b32 v23, s49, 18
430; GFX10_1-NEXT: v_writelane_b32 v23, s50, 19
431; GFX10_1-NEXT: v_writelane_b32 v23, s51, 20
432; GFX10_1-NEXT: v_writelane_b32 v23, s52, 21
433; GFX10_1-NEXT: v_writelane_b32 v23, s53, 22
434; GFX10_1-NEXT: v_writelane_b32 v23, s54, 23
435; GFX10_1-NEXT: v_writelane_b32 v23, s55, 24
436; GFX10_1-NEXT: v_writelane_b32 v23, s56, 25
437; GFX10_1-NEXT: v_writelane_b32 v23, s57, 26
438; GFX10_1-NEXT: v_writelane_b32 v23, s58, 27
439; GFX10_1-NEXT: ;;#ASMSTART
440; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
441; GFX10_1-NEXT: ;;#ASMEND
442; GFX10_1-NEXT: v_lshrrev_b32_e64 v24, 5, s32
443; GFX10_1-NEXT: v_writelane_b32 v23, s59, 28
444; GFX10_1-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
445; GFX10_1-NEXT: v_readfirstlane_b32 s59, v24
446; GFX10_1-NEXT: ;;#ASMSTART
447; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
448; GFX10_1-NEXT: ;;#ASMEND
449; GFX10_1-NEXT: v_readlane_b32 s59, v23, 28
450; GFX10_1-NEXT: v_readlane_b32 s58, v23, 27
451; GFX10_1-NEXT: v_readlane_b32 s57, v23, 26
452; GFX10_1-NEXT: v_readlane_b32 s56, v23, 25
453; GFX10_1-NEXT: v_readlane_b32 s55, v23, 24
454; GFX10_1-NEXT: v_readlane_b32 s54, v23, 23
455; GFX10_1-NEXT: v_readlane_b32 s53, v23, 22
456; GFX10_1-NEXT: v_readlane_b32 s52, v23, 21
457; GFX10_1-NEXT: v_readlane_b32 s51, v23, 20
458; GFX10_1-NEXT: v_readlane_b32 s50, v23, 19
459; GFX10_1-NEXT: v_readlane_b32 s49, v23, 18
460; GFX10_1-NEXT: v_readlane_b32 s48, v23, 17
461; GFX10_1-NEXT: v_readlane_b32 s47, v23, 16
462; GFX10_1-NEXT: v_readlane_b32 s46, v23, 15
463; GFX10_1-NEXT: v_readlane_b32 s45, v23, 14
464; GFX10_1-NEXT: v_readlane_b32 s44, v23, 13
465; GFX10_1-NEXT: v_readlane_b32 s43, v23, 12
466; GFX10_1-NEXT: v_readlane_b32 s42, v23, 11
467; GFX10_1-NEXT: v_readlane_b32 s41, v23, 10
468; GFX10_1-NEXT: v_readlane_b32 s40, v23, 9
469; GFX10_1-NEXT: v_readlane_b32 s39, v23, 8
470; GFX10_1-NEXT: v_readlane_b32 s38, v23, 7
471; GFX10_1-NEXT: v_readlane_b32 s37, v23, 6
472; GFX10_1-NEXT: v_readlane_b32 s36, v23, 5
473; GFX10_1-NEXT: v_readlane_b32 s35, v23, 4
474; GFX10_1-NEXT: v_readlane_b32 s34, v23, 3
475; GFX10_1-NEXT: v_readlane_b32 s33, v23, 2
476; GFX10_1-NEXT: v_readlane_b32 s31, v23, 1
477; GFX10_1-NEXT: v_readlane_b32 s30, v23, 0
478; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
479; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880
480; GFX10_1-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload
481; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
482; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
483; GFX10_1-NEXT: s_waitcnt vmcnt(0)
484; GFX10_1-NEXT: s_setpc_b64 s[30:31]
485;
486; GFX10_3-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
487; GFX10_3: ; %bb.0:
488; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
489; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
490; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880
491; GFX10_3-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill
492; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
493; GFX10_3-NEXT: v_writelane_b32 v23, s30, 0
494; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
495; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
496; GFX10_3-NEXT: v_writelane_b32 v23, s31, 1
497; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
498; GFX10_3-NEXT: ;;#ASMSTART
499; GFX10_3-NEXT: ; use alloca0 v0
500; GFX10_3-NEXT: ;;#ASMEND
501; GFX10_3-NEXT: v_writelane_b32 v23, s33, 2
502; GFX10_3-NEXT: v_writelane_b32 v23, s34, 3
503; GFX10_3-NEXT: v_writelane_b32 v23, s35, 4
504; GFX10_3-NEXT: v_writelane_b32 v23, s36, 5
505; GFX10_3-NEXT: v_writelane_b32 v23, s37, 6
506; GFX10_3-NEXT: v_writelane_b32 v23, s38, 7
507; GFX10_3-NEXT: v_writelane_b32 v23, s39, 8
508; GFX10_3-NEXT: v_writelane_b32 v23, s40, 9
509; GFX10_3-NEXT: v_writelane_b32 v23, s41, 10
510; GFX10_3-NEXT: v_writelane_b32 v23, s42, 11
511; GFX10_3-NEXT: v_writelane_b32 v23, s43, 12
512; GFX10_3-NEXT: v_writelane_b32 v23, s44, 13
513; GFX10_3-NEXT: v_writelane_b32 v23, s45, 14
514; GFX10_3-NEXT: v_writelane_b32 v23, s46, 15
515; GFX10_3-NEXT: v_writelane_b32 v23, s47, 16
516; GFX10_3-NEXT: v_writelane_b32 v23, s48, 17
517; GFX10_3-NEXT: v_writelane_b32 v23, s49, 18
518; GFX10_3-NEXT: v_writelane_b32 v23, s50, 19
519; GFX10_3-NEXT: v_writelane_b32 v23, s51, 20
520; GFX10_3-NEXT: v_writelane_b32 v23, s52, 21
521; GFX10_3-NEXT: v_writelane_b32 v23, s53, 22
522; GFX10_3-NEXT: v_writelane_b32 v23, s54, 23
523; GFX10_3-NEXT: v_writelane_b32 v23, s55, 24
524; GFX10_3-NEXT: v_writelane_b32 v23, s56, 25
525; GFX10_3-NEXT: v_writelane_b32 v23, s57, 26
526; GFX10_3-NEXT: v_writelane_b32 v23, s58, 27
527; GFX10_3-NEXT: ;;#ASMSTART
528; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
529; GFX10_3-NEXT: ;;#ASMEND
530; GFX10_3-NEXT: v_lshrrev_b32_e64 v24, 5, s32
531; GFX10_3-NEXT: v_writelane_b32 v23, s59, 28
532; GFX10_3-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
533; GFX10_3-NEXT: v_readfirstlane_b32 s59, v24
534; GFX10_3-NEXT: ;;#ASMSTART
535; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
536; GFX10_3-NEXT: ;;#ASMEND
537; GFX10_3-NEXT: v_readlane_b32 s59, v23, 28
538; GFX10_3-NEXT: v_readlane_b32 s58, v23, 27
539; GFX10_3-NEXT: v_readlane_b32 s57, v23, 26
540; GFX10_3-NEXT: v_readlane_b32 s56, v23, 25
541; GFX10_3-NEXT: v_readlane_b32 s55, v23, 24
542; GFX10_3-NEXT: v_readlane_b32 s54, v23, 23
543; GFX10_3-NEXT: v_readlane_b32 s53, v23, 22
544; GFX10_3-NEXT: v_readlane_b32 s52, v23, 21
545; GFX10_3-NEXT: v_readlane_b32 s51, v23, 20
546; GFX10_3-NEXT: v_readlane_b32 s50, v23, 19
547; GFX10_3-NEXT: v_readlane_b32 s49, v23, 18
548; GFX10_3-NEXT: v_readlane_b32 s48, v23, 17
549; GFX10_3-NEXT: v_readlane_b32 s47, v23, 16
550; GFX10_3-NEXT: v_readlane_b32 s46, v23, 15
551; GFX10_3-NEXT: v_readlane_b32 s45, v23, 14
552; GFX10_3-NEXT: v_readlane_b32 s44, v23, 13
553; GFX10_3-NEXT: v_readlane_b32 s43, v23, 12
554; GFX10_3-NEXT: v_readlane_b32 s42, v23, 11
555; GFX10_3-NEXT: v_readlane_b32 s41, v23, 10
556; GFX10_3-NEXT: v_readlane_b32 s40, v23, 9
557; GFX10_3-NEXT: v_readlane_b32 s39, v23, 8
558; GFX10_3-NEXT: v_readlane_b32 s38, v23, 7
559; GFX10_3-NEXT: v_readlane_b32 s37, v23, 6
560; GFX10_3-NEXT: v_readlane_b32 s36, v23, 5
561; GFX10_3-NEXT: v_readlane_b32 s35, v23, 4
562; GFX10_3-NEXT: v_readlane_b32 s34, v23, 3
563; GFX10_3-NEXT: v_readlane_b32 s33, v23, 2
564; GFX10_3-NEXT: v_readlane_b32 s31, v23, 1
565; GFX10_3-NEXT: v_readlane_b32 s30, v23, 0
566; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
567; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880
568; GFX10_3-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload
569; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
570; GFX10_3-NEXT: s_waitcnt vmcnt(0)
571; GFX10_3-NEXT: s_setpc_b64 s[30:31]
572;
573; GFX11-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
574; GFX11: ; %bb.0:
575; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
576; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
577; GFX11-NEXT: s_add_i32 s1, s32, 0x4044
578; GFX11-NEXT: scratch_store_b32 off, v23, s1 ; 4-byte Folded Spill
579; GFX11-NEXT: s_mov_b32 exec_lo, s0
580; GFX11-NEXT: v_writelane_b32 v23, s30, 0
581; GFX11-NEXT: s_add_i32 s0, s32, 64
582; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
583; GFX11-NEXT: v_mov_b32_e32 v0, s0
584; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
585; GFX11-NEXT: v_writelane_b32 v23, s31, 1
586; GFX11-NEXT: ;;#ASMSTART
587; GFX11-NEXT: ; use alloca0 v0
588; GFX11-NEXT: ;;#ASMEND
589; GFX11-NEXT: v_writelane_b32 v23, s33, 2
590; GFX11-NEXT: v_writelane_b32 v23, s34, 3
591; GFX11-NEXT: v_writelane_b32 v23, s35, 4
592; GFX11-NEXT: v_writelane_b32 v23, s36, 5
593; GFX11-NEXT: v_writelane_b32 v23, s37, 6
594; GFX11-NEXT: v_writelane_b32 v23, s38, 7
595; GFX11-NEXT: v_writelane_b32 v23, s39, 8
596; GFX11-NEXT: v_writelane_b32 v23, s40, 9
597; GFX11-NEXT: v_writelane_b32 v23, s41, 10
598; GFX11-NEXT: v_writelane_b32 v23, s42, 11
599; GFX11-NEXT: v_writelane_b32 v23, s43, 12
600; GFX11-NEXT: v_writelane_b32 v23, s44, 13
601; GFX11-NEXT: v_writelane_b32 v23, s45, 14
602; GFX11-NEXT: v_writelane_b32 v23, s46, 15
603; GFX11-NEXT: v_writelane_b32 v23, s47, 16
604; GFX11-NEXT: v_writelane_b32 v23, s48, 17
605; GFX11-NEXT: v_writelane_b32 v23, s49, 18
606; GFX11-NEXT: v_writelane_b32 v23, s50, 19
607; GFX11-NEXT: v_writelane_b32 v23, s51, 20
608; GFX11-NEXT: v_writelane_b32 v23, s52, 21
609; GFX11-NEXT: v_writelane_b32 v23, s53, 22
610; GFX11-NEXT: v_writelane_b32 v23, s54, 23
611; GFX11-NEXT: v_writelane_b32 v23, s55, 24
612; GFX11-NEXT: v_writelane_b32 v23, s56, 25
613; GFX11-NEXT: v_writelane_b32 v23, s57, 26
614; GFX11-NEXT: v_writelane_b32 v23, s58, 27
615; GFX11-NEXT: ;;#ASMSTART
616; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
617; GFX11-NEXT: ;;#ASMEND
618; GFX11-NEXT: s_addc_u32 s32, s32, 0x4040
619; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
620; GFX11-NEXT: s_bitcmp1_b32 s32, 0
621; GFX11-NEXT: v_writelane_b32 v23, s59, 28
622; GFX11-NEXT: s_bitset0_b32 s32, 0
623; GFX11-NEXT: s_mov_b32 s59, s32
624; GFX11-NEXT: s_addc_u32 s32, s32, 0xffffbfc0
625; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
626; GFX11-NEXT: s_bitcmp1_b32 s32, 0
627; GFX11-NEXT: s_bitset0_b32 s32, 0
628; GFX11-NEXT: ;;#ASMSTART
629; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
630; GFX11-NEXT: ;;#ASMEND
631; GFX11-NEXT: v_readlane_b32 s59, v23, 28
632; GFX11-NEXT: v_readlane_b32 s58, v23, 27
633; GFX11-NEXT: v_readlane_b32 s57, v23, 26
634; GFX11-NEXT: v_readlane_b32 s56, v23, 25
635; GFX11-NEXT: v_readlane_b32 s55, v23, 24
636; GFX11-NEXT: v_readlane_b32 s54, v23, 23
637; GFX11-NEXT: v_readlane_b32 s53, v23, 22
638; GFX11-NEXT: v_readlane_b32 s52, v23, 21
639; GFX11-NEXT: v_readlane_b32 s51, v23, 20
640; GFX11-NEXT: v_readlane_b32 s50, v23, 19
641; GFX11-NEXT: v_readlane_b32 s49, v23, 18
642; GFX11-NEXT: v_readlane_b32 s48, v23, 17
643; GFX11-NEXT: v_readlane_b32 s47, v23, 16
644; GFX11-NEXT: v_readlane_b32 s46, v23, 15
645; GFX11-NEXT: v_readlane_b32 s45, v23, 14
646; GFX11-NEXT: v_readlane_b32 s44, v23, 13
647; GFX11-NEXT: v_readlane_b32 s43, v23, 12
648; GFX11-NEXT: v_readlane_b32 s42, v23, 11
649; GFX11-NEXT: v_readlane_b32 s41, v23, 10
650; GFX11-NEXT: v_readlane_b32 s40, v23, 9
651; GFX11-NEXT: v_readlane_b32 s39, v23, 8
652; GFX11-NEXT: v_readlane_b32 s38, v23, 7
653; GFX11-NEXT: v_readlane_b32 s37, v23, 6
654; GFX11-NEXT: v_readlane_b32 s36, v23, 5
655; GFX11-NEXT: v_readlane_b32 s35, v23, 4
656; GFX11-NEXT: v_readlane_b32 s34, v23, 3
657; GFX11-NEXT: v_readlane_b32 s33, v23, 2
658; GFX11-NEXT: v_readlane_b32 s31, v23, 1
659; GFX11-NEXT: v_readlane_b32 s30, v23, 0
660; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
661; GFX11-NEXT: s_add_i32 s1, s32, 0x4044
662; GFX11-NEXT: scratch_load_b32 v23, off, s1 ; 4-byte Folded Reload
663; GFX11-NEXT: s_mov_b32 exec_lo, s0
664; GFX11-NEXT: s_waitcnt vmcnt(0)
665; GFX11-NEXT: s_setpc_b64 s[30:31]
666;
667; GFX12-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
668; GFX12: ; %bb.0:
669; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
670; GFX12-NEXT: s_wait_expcnt 0x0
671; GFX12-NEXT: s_wait_samplecnt 0x0
672; GFX12-NEXT: s_wait_bvhcnt 0x0
673; GFX12-NEXT: s_wait_kmcnt 0x0
674; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
675; GFX12-NEXT: scratch_store_b32 off, v23, s32 offset:16388 ; 4-byte Folded Spill
Carl Ritson86627142024-09-04 12:15:20 +0900676; GFX12-NEXT: s_wait_alu 0xfffe
Matt Arsenaultef676642024-07-31 23:10:15 +0400677; GFX12-NEXT: s_mov_b32 exec_lo, s0
678; GFX12-NEXT: v_writelane_b32 v23, s30, 0
679; GFX12-NEXT: v_mov_b32_e32 v0, s32
680; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
681; GFX12-NEXT: ;;#ASMSTART
682; GFX12-NEXT: ; use alloca0 v0
683; GFX12-NEXT: ;;#ASMEND
684; GFX12-NEXT: v_writelane_b32 v23, s31, 1
685; GFX12-NEXT: v_writelane_b32 v23, s33, 2
686; GFX12-NEXT: v_writelane_b32 v23, s34, 3
687; GFX12-NEXT: v_writelane_b32 v23, s35, 4
688; GFX12-NEXT: v_writelane_b32 v23, s36, 5
689; GFX12-NEXT: v_writelane_b32 v23, s37, 6
690; GFX12-NEXT: v_writelane_b32 v23, s38, 7
691; GFX12-NEXT: v_writelane_b32 v23, s39, 8
692; GFX12-NEXT: v_writelane_b32 v23, s40, 9
693; GFX12-NEXT: v_writelane_b32 v23, s41, 10
694; GFX12-NEXT: v_writelane_b32 v23, s42, 11
695; GFX12-NEXT: v_writelane_b32 v23, s43, 12
696; GFX12-NEXT: v_writelane_b32 v23, s44, 13
697; GFX12-NEXT: v_writelane_b32 v23, s45, 14
698; GFX12-NEXT: v_writelane_b32 v23, s46, 15
699; GFX12-NEXT: v_writelane_b32 v23, s47, 16
700; GFX12-NEXT: v_writelane_b32 v23, s48, 17
701; GFX12-NEXT: v_writelane_b32 v23, s49, 18
702; GFX12-NEXT: v_writelane_b32 v23, s50, 19
703; GFX12-NEXT: v_writelane_b32 v23, s51, 20
704; GFX12-NEXT: v_writelane_b32 v23, s52, 21
705; GFX12-NEXT: v_writelane_b32 v23, s53, 22
706; GFX12-NEXT: v_writelane_b32 v23, s54, 23
707; GFX12-NEXT: v_writelane_b32 v23, s55, 24
708; GFX12-NEXT: v_writelane_b32 v23, s56, 25
709; GFX12-NEXT: v_writelane_b32 v23, s57, 26
710; GFX12-NEXT: v_writelane_b32 v23, s58, 27
711; GFX12-NEXT: ;;#ASMSTART
712; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
713; GFX12-NEXT: ;;#ASMEND
714; GFX12-NEXT: s_add_co_ci_u32 s32, s32, 0x4000
Carl Ritson86627142024-09-04 12:15:20 +0900715; GFX12-NEXT: s_wait_alu 0xfffe
Matt Arsenaultef676642024-07-31 23:10:15 +0400716; GFX12-NEXT: s_bitcmp1_b32 s32, 0
717; GFX12-NEXT: v_writelane_b32 v23, s59, 28
718; GFX12-NEXT: s_bitset0_b32 s32, 0
Carl Ritson86627142024-09-04 12:15:20 +0900719; GFX12-NEXT: s_wait_alu 0xfffe
Matt Arsenaultef676642024-07-31 23:10:15 +0400720; GFX12-NEXT: s_mov_b32 s59, s32
721; GFX12-NEXT: s_add_co_ci_u32 s32, s32, 0xffffc000
Carl Ritson86627142024-09-04 12:15:20 +0900722; GFX12-NEXT: s_wait_alu 0xfffe
Matt Arsenaultef676642024-07-31 23:10:15 +0400723; GFX12-NEXT: s_bitcmp1_b32 s32, 0
724; GFX12-NEXT: s_bitset0_b32 s32, 0
725; GFX12-NEXT: ;;#ASMSTART
726; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
727; GFX12-NEXT: ;;#ASMEND
728; GFX12-NEXT: v_readlane_b32 s59, v23, 28
729; GFX12-NEXT: v_readlane_b32 s58, v23, 27
730; GFX12-NEXT: v_readlane_b32 s57, v23, 26
731; GFX12-NEXT: v_readlane_b32 s56, v23, 25
732; GFX12-NEXT: v_readlane_b32 s55, v23, 24
733; GFX12-NEXT: v_readlane_b32 s54, v23, 23
734; GFX12-NEXT: v_readlane_b32 s53, v23, 22
735; GFX12-NEXT: v_readlane_b32 s52, v23, 21
736; GFX12-NEXT: v_readlane_b32 s51, v23, 20
737; GFX12-NEXT: v_readlane_b32 s50, v23, 19
738; GFX12-NEXT: v_readlane_b32 s49, v23, 18
739; GFX12-NEXT: v_readlane_b32 s48, v23, 17
740; GFX12-NEXT: v_readlane_b32 s47, v23, 16
741; GFX12-NEXT: v_readlane_b32 s46, v23, 15
742; GFX12-NEXT: v_readlane_b32 s45, v23, 14
743; GFX12-NEXT: v_readlane_b32 s44, v23, 13
744; GFX12-NEXT: v_readlane_b32 s43, v23, 12
745; GFX12-NEXT: v_readlane_b32 s42, v23, 11
746; GFX12-NEXT: v_readlane_b32 s41, v23, 10
747; GFX12-NEXT: v_readlane_b32 s40, v23, 9
748; GFX12-NEXT: v_readlane_b32 s39, v23, 8
749; GFX12-NEXT: v_readlane_b32 s38, v23, 7
750; GFX12-NEXT: v_readlane_b32 s37, v23, 6
751; GFX12-NEXT: v_readlane_b32 s36, v23, 5
752; GFX12-NEXT: v_readlane_b32 s35, v23, 4
753; GFX12-NEXT: v_readlane_b32 s34, v23, 3
754; GFX12-NEXT: v_readlane_b32 s33, v23, 2
755; GFX12-NEXT: v_readlane_b32 s31, v23, 1
756; GFX12-NEXT: v_readlane_b32 s30, v23, 0
757; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
758; GFX12-NEXT: scratch_load_b32 v23, off, s32 offset:16388 ; 4-byte Folded Reload
Carl Ritson86627142024-09-04 12:15:20 +0900759; GFX12-NEXT: s_wait_alu 0xfffe
Matt Arsenaultef676642024-07-31 23:10:15 +0400760; GFX12-NEXT: s_mov_b32 exec_lo, s0
761; GFX12-NEXT: s_wait_loadcnt 0x0
762; GFX12-NEXT: s_setpc_b64 s[30:31]
763 %alloca0 = alloca [4096 x i32], align 64, addrspace(5)
764 %alloca1 = alloca i32, align 4, addrspace(5)
765 call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
766
767 ; Force no SGPRs to be available for the carry-out of the vector add.
768 %asm = call %asm.output asm sideeffect
769 "; def $0, $1, $2, $3, $4, $5, $6, $7, $8",
770 "={s[0:15]},={s[16:31]},={s[32:47]},={s[48:55]},={s[56:57]},={s58},={v[0:15]},={v[16:22]},={vcc}"()
771
772 %s0 = extractvalue %asm.output %asm, 0
773 %s1 = extractvalue %asm.output %asm, 1
774 %s2 = extractvalue %asm.output %asm, 2
775 %s3 = extractvalue %asm.output %asm, 3
776 %s4 = extractvalue %asm.output %asm, 4
777 %s5 = extractvalue %asm.output %asm, 5
778
779 %v0 = extractvalue %asm.output %asm, 6
780 %v1 = extractvalue %asm.output %asm, 7
781
782 %vcc = extractvalue %asm.output %asm, 8
783
784 ; scc is unavailable since it is live in
785 call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10",
786 "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s59},{scc}"(
787 <16 x i32> %s0,
788 <16 x i32> %s1,
789 <16 x i32> %s2,
790 <8 x i32> %s3,
791 <2 x i32> %s4,
792 i32 %s5,
793 <16 x i32> %v0,
794 <7 x i32> %v1,
795 i64 %vcc,
796 ptr addrspace(5) %alloca1,
797 i32 0) ; use of scc
798
799 ret void
800}
801
802; FIXME: This would have test FI at offset 0, but other objects get
803; assigned there. This shows a non-0, but inline immediate that can
804; fold directly into the address computation.
805define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset() #1 {
806; GFX7-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
807; GFX7: ; %bb.0:
808; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
809; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
810; GFX7-NEXT: s_add_i32 s6, s32, 0x100400
811; GFX7-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill
812; GFX7-NEXT: s_mov_b64 exec, s[4:5]
813; GFX7-NEXT: v_writelane_b32 v21, s30, 0
814; GFX7-NEXT: v_writelane_b32 v21, s31, 1
815; GFX7-NEXT: v_writelane_b32 v21, s33, 2
816; GFX7-NEXT: v_writelane_b32 v21, s34, 3
817; GFX7-NEXT: v_writelane_b32 v21, s35, 4
818; GFX7-NEXT: v_writelane_b32 v21, s36, 5
819; GFX7-NEXT: v_writelane_b32 v21, s37, 6
820; GFX7-NEXT: v_writelane_b32 v21, s38, 7
821; GFX7-NEXT: v_writelane_b32 v21, s39, 8
822; GFX7-NEXT: v_writelane_b32 v21, s40, 9
823; GFX7-NEXT: v_writelane_b32 v21, s41, 10
824; GFX7-NEXT: v_writelane_b32 v21, s42, 11
825; GFX7-NEXT: v_writelane_b32 v21, s43, 12
826; GFX7-NEXT: v_writelane_b32 v21, s44, 13
827; GFX7-NEXT: v_writelane_b32 v21, s45, 14
828; GFX7-NEXT: v_writelane_b32 v21, s46, 15
829; GFX7-NEXT: v_writelane_b32 v21, s47, 16
830; GFX7-NEXT: v_writelane_b32 v21, s48, 17
831; GFX7-NEXT: v_writelane_b32 v21, s49, 18
832; GFX7-NEXT: v_writelane_b32 v21, s50, 19
833; GFX7-NEXT: v_writelane_b32 v21, s51, 20
834; GFX7-NEXT: v_writelane_b32 v21, s52, 21
835; GFX7-NEXT: v_writelane_b32 v21, s53, 22
836; GFX7-NEXT: v_writelane_b32 v21, s54, 23
837; GFX7-NEXT: v_writelane_b32 v21, s55, 24
838; GFX7-NEXT: v_writelane_b32 v21, s56, 25
839; GFX7-NEXT: v_writelane_b32 v21, s57, 26
840; GFX7-NEXT: s_and_b64 s[4:5], 0, exec
Matt Arsenaultef676642024-07-31 23:10:15 +0400841; GFX7-NEXT: v_writelane_b32 v21, s58, 27
842; GFX7-NEXT: ;;#ASMSTART
843; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
844; GFX7-NEXT: ;;#ASMEND
Pankaj Dwivedi42bae9c2024-08-05 12:49:33 +0530845; GFX7-NEXT: v_mad_u32_u24 v22, 16, 64, s32
Pankaj Dwivediadac04ff2024-08-02 17:17:32 +0530846; GFX7-NEXT: v_lshrrev_b32_e32 v22, 6, v22
Matt Arsenaultef676642024-07-31 23:10:15 +0400847; GFX7-NEXT: v_writelane_b32 v21, s59, 28
848; GFX7-NEXT: v_readfirstlane_b32 s59, v22
849; GFX7-NEXT: ;;#ASMSTART
850; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
851; GFX7-NEXT: ;;#ASMEND
852; GFX7-NEXT: v_readlane_b32 s59, v21, 28
853; GFX7-NEXT: v_readlane_b32 s58, v21, 27
854; GFX7-NEXT: v_readlane_b32 s57, v21, 26
855; GFX7-NEXT: v_readlane_b32 s56, v21, 25
856; GFX7-NEXT: v_readlane_b32 s55, v21, 24
857; GFX7-NEXT: v_readlane_b32 s54, v21, 23
858; GFX7-NEXT: v_readlane_b32 s53, v21, 22
859; GFX7-NEXT: v_readlane_b32 s52, v21, 21
860; GFX7-NEXT: v_readlane_b32 s51, v21, 20
861; GFX7-NEXT: v_readlane_b32 s50, v21, 19
862; GFX7-NEXT: v_readlane_b32 s49, v21, 18
863; GFX7-NEXT: v_readlane_b32 s48, v21, 17
864; GFX7-NEXT: v_readlane_b32 s47, v21, 16
865; GFX7-NEXT: v_readlane_b32 s46, v21, 15
866; GFX7-NEXT: v_readlane_b32 s45, v21, 14
867; GFX7-NEXT: v_readlane_b32 s44, v21, 13
868; GFX7-NEXT: v_readlane_b32 s43, v21, 12
869; GFX7-NEXT: v_readlane_b32 s42, v21, 11
870; GFX7-NEXT: v_readlane_b32 s41, v21, 10
871; GFX7-NEXT: v_readlane_b32 s40, v21, 9
872; GFX7-NEXT: v_readlane_b32 s39, v21, 8
873; GFX7-NEXT: v_readlane_b32 s38, v21, 7
874; GFX7-NEXT: v_readlane_b32 s37, v21, 6
875; GFX7-NEXT: v_readlane_b32 s36, v21, 5
876; GFX7-NEXT: v_readlane_b32 s35, v21, 4
877; GFX7-NEXT: v_readlane_b32 s34, v21, 3
878; GFX7-NEXT: v_readlane_b32 s33, v21, 2
879; GFX7-NEXT: v_readlane_b32 s31, v21, 1
880; GFX7-NEXT: v_readlane_b32 s30, v21, 0
881; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
882; GFX7-NEXT: s_add_i32 s6, s32, 0x100400
883; GFX7-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload
884; GFX7-NEXT: s_mov_b64 exec, s[4:5]
885; GFX7-NEXT: s_waitcnt vmcnt(0)
886; GFX7-NEXT: s_setpc_b64 s[30:31]
887;
888; GFX8-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
889; GFX8: ; %bb.0:
890; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
891; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
892; GFX8-NEXT: s_add_i32 s6, s32, 0x100400
893; GFX8-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill
894; GFX8-NEXT: s_mov_b64 exec, s[4:5]
895; GFX8-NEXT: v_writelane_b32 v21, s30, 0
896; GFX8-NEXT: v_writelane_b32 v21, s31, 1
897; GFX8-NEXT: v_writelane_b32 v21, s33, 2
898; GFX8-NEXT: v_writelane_b32 v21, s34, 3
899; GFX8-NEXT: v_writelane_b32 v21, s35, 4
900; GFX8-NEXT: v_writelane_b32 v21, s36, 5
901; GFX8-NEXT: v_writelane_b32 v21, s37, 6
902; GFX8-NEXT: v_writelane_b32 v21, s38, 7
903; GFX8-NEXT: v_writelane_b32 v21, s39, 8
904; GFX8-NEXT: v_writelane_b32 v21, s40, 9
905; GFX8-NEXT: v_writelane_b32 v21, s41, 10
906; GFX8-NEXT: v_writelane_b32 v21, s42, 11
907; GFX8-NEXT: v_writelane_b32 v21, s43, 12
908; GFX8-NEXT: v_writelane_b32 v21, s44, 13
909; GFX8-NEXT: v_writelane_b32 v21, s45, 14
910; GFX8-NEXT: v_writelane_b32 v21, s46, 15
911; GFX8-NEXT: v_writelane_b32 v21, s47, 16
912; GFX8-NEXT: v_writelane_b32 v21, s48, 17
913; GFX8-NEXT: v_writelane_b32 v21, s49, 18
914; GFX8-NEXT: v_writelane_b32 v21, s50, 19
915; GFX8-NEXT: v_writelane_b32 v21, s51, 20
916; GFX8-NEXT: v_writelane_b32 v21, s52, 21
917; GFX8-NEXT: v_writelane_b32 v21, s53, 22
918; GFX8-NEXT: v_writelane_b32 v21, s54, 23
919; GFX8-NEXT: v_writelane_b32 v21, s55, 24
920; GFX8-NEXT: v_writelane_b32 v21, s56, 25
921; GFX8-NEXT: v_writelane_b32 v21, s57, 26
922; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
Matt Arsenaultef676642024-07-31 23:10:15 +0400923; GFX8-NEXT: v_writelane_b32 v21, s58, 27
924; GFX8-NEXT: ;;#ASMSTART
925; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
926; GFX8-NEXT: ;;#ASMEND
Pankaj Dwivedi42bae9c2024-08-05 12:49:33 +0530927; GFX8-NEXT: v_mad_u32_u24 v22, 16, 64, s32
Pankaj Dwivediadac04ff2024-08-02 17:17:32 +0530928; GFX8-NEXT: v_lshrrev_b32_e32 v22, 6, v22
Matt Arsenaultef676642024-07-31 23:10:15 +0400929; GFX8-NEXT: v_writelane_b32 v21, s59, 28
930; GFX8-NEXT: v_readfirstlane_b32 s59, v22
931; GFX8-NEXT: ;;#ASMSTART
932; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
933; GFX8-NEXT: ;;#ASMEND
934; GFX8-NEXT: v_readlane_b32 s59, v21, 28
935; GFX8-NEXT: v_readlane_b32 s58, v21, 27
936; GFX8-NEXT: v_readlane_b32 s57, v21, 26
937; GFX8-NEXT: v_readlane_b32 s56, v21, 25
938; GFX8-NEXT: v_readlane_b32 s55, v21, 24
939; GFX8-NEXT: v_readlane_b32 s54, v21, 23
940; GFX8-NEXT: v_readlane_b32 s53, v21, 22
941; GFX8-NEXT: v_readlane_b32 s52, v21, 21
942; GFX8-NEXT: v_readlane_b32 s51, v21, 20
943; GFX8-NEXT: v_readlane_b32 s50, v21, 19
944; GFX8-NEXT: v_readlane_b32 s49, v21, 18
945; GFX8-NEXT: v_readlane_b32 s48, v21, 17
946; GFX8-NEXT: v_readlane_b32 s47, v21, 16
947; GFX8-NEXT: v_readlane_b32 s46, v21, 15
948; GFX8-NEXT: v_readlane_b32 s45, v21, 14
949; GFX8-NEXT: v_readlane_b32 s44, v21, 13
950; GFX8-NEXT: v_readlane_b32 s43, v21, 12
951; GFX8-NEXT: v_readlane_b32 s42, v21, 11
952; GFX8-NEXT: v_readlane_b32 s41, v21, 10
953; GFX8-NEXT: v_readlane_b32 s40, v21, 9
954; GFX8-NEXT: v_readlane_b32 s39, v21, 8
955; GFX8-NEXT: v_readlane_b32 s38, v21, 7
956; GFX8-NEXT: v_readlane_b32 s37, v21, 6
957; GFX8-NEXT: v_readlane_b32 s36, v21, 5
958; GFX8-NEXT: v_readlane_b32 s35, v21, 4
959; GFX8-NEXT: v_readlane_b32 s34, v21, 3
960; GFX8-NEXT: v_readlane_b32 s33, v21, 2
961; GFX8-NEXT: v_readlane_b32 s31, v21, 1
962; GFX8-NEXT: v_readlane_b32 s30, v21, 0
963; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
964; GFX8-NEXT: s_add_i32 s6, s32, 0x100400
965; GFX8-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload
966; GFX8-NEXT: s_mov_b64 exec, s[4:5]
967; GFX8-NEXT: s_waitcnt vmcnt(0)
968; GFX8-NEXT: s_setpc_b64 s[30:31]
969;
970; GFX900-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
971; GFX900: ; %bb.0:
972; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
973; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
974; GFX900-NEXT: s_add_i32 s6, s32, 0x100400
975; GFX900-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill
976; GFX900-NEXT: s_mov_b64 exec, s[4:5]
977; GFX900-NEXT: v_writelane_b32 v21, s30, 0
978; GFX900-NEXT: v_writelane_b32 v21, s31, 1
979; GFX900-NEXT: v_writelane_b32 v21, s33, 2
980; GFX900-NEXT: v_writelane_b32 v21, s34, 3
981; GFX900-NEXT: v_writelane_b32 v21, s35, 4
982; GFX900-NEXT: v_writelane_b32 v21, s36, 5
983; GFX900-NEXT: v_writelane_b32 v21, s37, 6
984; GFX900-NEXT: v_writelane_b32 v21, s38, 7
985; GFX900-NEXT: v_writelane_b32 v21, s39, 8
986; GFX900-NEXT: v_writelane_b32 v21, s40, 9
987; GFX900-NEXT: v_writelane_b32 v21, s41, 10
988; GFX900-NEXT: v_writelane_b32 v21, s42, 11
989; GFX900-NEXT: v_writelane_b32 v21, s43, 12
990; GFX900-NEXT: v_writelane_b32 v21, s44, 13
991; GFX900-NEXT: v_writelane_b32 v21, s45, 14
992; GFX900-NEXT: v_writelane_b32 v21, s46, 15
993; GFX900-NEXT: v_writelane_b32 v21, s47, 16
994; GFX900-NEXT: v_writelane_b32 v21, s48, 17
995; GFX900-NEXT: v_writelane_b32 v21, s49, 18
996; GFX900-NEXT: v_writelane_b32 v21, s50, 19
997; GFX900-NEXT: v_writelane_b32 v21, s51, 20
998; GFX900-NEXT: v_writelane_b32 v21, s52, 21
999; GFX900-NEXT: v_writelane_b32 v21, s53, 22
1000; GFX900-NEXT: v_writelane_b32 v21, s54, 23
1001; GFX900-NEXT: v_writelane_b32 v21, s55, 24
1002; GFX900-NEXT: v_writelane_b32 v21, s56, 25
1003; GFX900-NEXT: v_writelane_b32 v21, s57, 26
1004; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
1005; GFX900-NEXT: v_writelane_b32 v21, s58, 27
1006; GFX900-NEXT: ;;#ASMSTART
1007; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
1008; GFX900-NEXT: ;;#ASMEND
1009; GFX900-NEXT: v_lshrrev_b32_e64 v22, 6, s32
1010; GFX900-NEXT: v_add_u32_e32 v22, 16, v22
1011; GFX900-NEXT: v_writelane_b32 v21, s59, 28
1012; GFX900-NEXT: v_readfirstlane_b32 s59, v22
1013; GFX900-NEXT: ;;#ASMSTART
1014; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
1015; GFX900-NEXT: ;;#ASMEND
1016; GFX900-NEXT: v_readlane_b32 s59, v21, 28
1017; GFX900-NEXT: v_readlane_b32 s58, v21, 27
1018; GFX900-NEXT: v_readlane_b32 s57, v21, 26
1019; GFX900-NEXT: v_readlane_b32 s56, v21, 25
1020; GFX900-NEXT: v_readlane_b32 s55, v21, 24
1021; GFX900-NEXT: v_readlane_b32 s54, v21, 23
1022; GFX900-NEXT: v_readlane_b32 s53, v21, 22
1023; GFX900-NEXT: v_readlane_b32 s52, v21, 21
1024; GFX900-NEXT: v_readlane_b32 s51, v21, 20
1025; GFX900-NEXT: v_readlane_b32 s50, v21, 19
1026; GFX900-NEXT: v_readlane_b32 s49, v21, 18
1027; GFX900-NEXT: v_readlane_b32 s48, v21, 17
1028; GFX900-NEXT: v_readlane_b32 s47, v21, 16
1029; GFX900-NEXT: v_readlane_b32 s46, v21, 15
1030; GFX900-NEXT: v_readlane_b32 s45, v21, 14
1031; GFX900-NEXT: v_readlane_b32 s44, v21, 13
1032; GFX900-NEXT: v_readlane_b32 s43, v21, 12
1033; GFX900-NEXT: v_readlane_b32 s42, v21, 11
1034; GFX900-NEXT: v_readlane_b32 s41, v21, 10
1035; GFX900-NEXT: v_readlane_b32 s40, v21, 9
1036; GFX900-NEXT: v_readlane_b32 s39, v21, 8
1037; GFX900-NEXT: v_readlane_b32 s38, v21, 7
1038; GFX900-NEXT: v_readlane_b32 s37, v21, 6
1039; GFX900-NEXT: v_readlane_b32 s36, v21, 5
1040; GFX900-NEXT: v_readlane_b32 s35, v21, 4
1041; GFX900-NEXT: v_readlane_b32 s34, v21, 3
1042; GFX900-NEXT: v_readlane_b32 s33, v21, 2
1043; GFX900-NEXT: v_readlane_b32 s31, v21, 1
1044; GFX900-NEXT: v_readlane_b32 s30, v21, 0
1045; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
1046; GFX900-NEXT: s_add_i32 s6, s32, 0x100400
1047; GFX900-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload
1048; GFX900-NEXT: s_mov_b64 exec, s[4:5]
1049; GFX900-NEXT: s_waitcnt vmcnt(0)
1050; GFX900-NEXT: s_setpc_b64 s[30:31]
1051;
Fabian Rittera33a84e2025-02-13 15:17:12 +01001052; GFX942-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
1053; GFX942: ; %bb.0:
1054; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1055; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
1056; GFX942-NEXT: s_add_i32 s2, s32, 0x4010
1057; GFX942-NEXT: scratch_store_dword off, v21, s2 ; 4-byte Folded Spill
1058; GFX942-NEXT: s_mov_b64 exec, s[0:1]
1059; GFX942-NEXT: v_writelane_b32 v21, s30, 0
1060; GFX942-NEXT: v_writelane_b32 v21, s31, 1
1061; GFX942-NEXT: v_writelane_b32 v21, s33, 2
1062; GFX942-NEXT: v_writelane_b32 v21, s34, 3
1063; GFX942-NEXT: v_writelane_b32 v21, s35, 4
1064; GFX942-NEXT: v_writelane_b32 v21, s36, 5
1065; GFX942-NEXT: v_writelane_b32 v21, s37, 6
1066; GFX942-NEXT: v_writelane_b32 v21, s38, 7
1067; GFX942-NEXT: v_writelane_b32 v21, s39, 8
1068; GFX942-NEXT: v_writelane_b32 v21, s40, 9
1069; GFX942-NEXT: v_writelane_b32 v21, s41, 10
1070; GFX942-NEXT: v_writelane_b32 v21, s42, 11
1071; GFX942-NEXT: v_writelane_b32 v21, s43, 12
1072; GFX942-NEXT: v_writelane_b32 v21, s44, 13
1073; GFX942-NEXT: v_writelane_b32 v21, s45, 14
1074; GFX942-NEXT: v_writelane_b32 v21, s46, 15
1075; GFX942-NEXT: v_writelane_b32 v21, s47, 16
1076; GFX942-NEXT: v_writelane_b32 v21, s48, 17
1077; GFX942-NEXT: v_writelane_b32 v21, s49, 18
1078; GFX942-NEXT: v_writelane_b32 v21, s50, 19
1079; GFX942-NEXT: v_writelane_b32 v21, s51, 20
1080; GFX942-NEXT: v_writelane_b32 v21, s52, 21
1081; GFX942-NEXT: v_writelane_b32 v21, s53, 22
1082; GFX942-NEXT: v_writelane_b32 v21, s54, 23
1083; GFX942-NEXT: v_writelane_b32 v21, s55, 24
1084; GFX942-NEXT: v_writelane_b32 v21, s56, 25
1085; GFX942-NEXT: v_writelane_b32 v21, s57, 26
1086; GFX942-NEXT: v_writelane_b32 v21, s58, 27
1087; GFX942-NEXT: v_writelane_b32 v21, s59, 28
1088; GFX942-NEXT: v_writelane_b32 v21, s60, 29
1089; GFX942-NEXT: v_writelane_b32 v21, s61, 30
1090; GFX942-NEXT: s_and_b64 s[60:61], 0, exec
1091; GFX942-NEXT: ;;#ASMSTART
1092; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
1093; GFX942-NEXT: ;;#ASMEND
1094; GFX942-NEXT: s_addc_u32 s60, s32, 16
1095; GFX942-NEXT: s_bitcmp1_b32 s60, 0
1096; GFX942-NEXT: s_bitset0_b32 s60, 0
1097; GFX942-NEXT: s_mov_b32 s59, s60
1098; GFX942-NEXT: ;;#ASMSTART
1099; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
1100; GFX942-NEXT: ;;#ASMEND
1101; GFX942-NEXT: v_readlane_b32 s61, v21, 30
1102; GFX942-NEXT: v_readlane_b32 s60, v21, 29
1103; GFX942-NEXT: v_readlane_b32 s59, v21, 28
1104; GFX942-NEXT: v_readlane_b32 s58, v21, 27
1105; GFX942-NEXT: v_readlane_b32 s57, v21, 26
1106; GFX942-NEXT: v_readlane_b32 s56, v21, 25
1107; GFX942-NEXT: v_readlane_b32 s55, v21, 24
1108; GFX942-NEXT: v_readlane_b32 s54, v21, 23
1109; GFX942-NEXT: v_readlane_b32 s53, v21, 22
1110; GFX942-NEXT: v_readlane_b32 s52, v21, 21
1111; GFX942-NEXT: v_readlane_b32 s51, v21, 20
1112; GFX942-NEXT: v_readlane_b32 s50, v21, 19
1113; GFX942-NEXT: v_readlane_b32 s49, v21, 18
1114; GFX942-NEXT: v_readlane_b32 s48, v21, 17
1115; GFX942-NEXT: v_readlane_b32 s47, v21, 16
1116; GFX942-NEXT: v_readlane_b32 s46, v21, 15
1117; GFX942-NEXT: v_readlane_b32 s45, v21, 14
1118; GFX942-NEXT: v_readlane_b32 s44, v21, 13
1119; GFX942-NEXT: v_readlane_b32 s43, v21, 12
1120; GFX942-NEXT: v_readlane_b32 s42, v21, 11
1121; GFX942-NEXT: v_readlane_b32 s41, v21, 10
1122; GFX942-NEXT: v_readlane_b32 s40, v21, 9
1123; GFX942-NEXT: v_readlane_b32 s39, v21, 8
1124; GFX942-NEXT: v_readlane_b32 s38, v21, 7
1125; GFX942-NEXT: v_readlane_b32 s37, v21, 6
1126; GFX942-NEXT: v_readlane_b32 s36, v21, 5
1127; GFX942-NEXT: v_readlane_b32 s35, v21, 4
1128; GFX942-NEXT: v_readlane_b32 s34, v21, 3
1129; GFX942-NEXT: v_readlane_b32 s33, v21, 2
1130; GFX942-NEXT: v_readlane_b32 s31, v21, 1
1131; GFX942-NEXT: v_readlane_b32 s30, v21, 0
1132; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
1133; GFX942-NEXT: s_add_i32 s2, s32, 0x4010
1134; GFX942-NEXT: scratch_load_dword v21, off, s2 ; 4-byte Folded Reload
1135; GFX942-NEXT: s_mov_b64 exec, s[0:1]
1136; GFX942-NEXT: s_waitcnt vmcnt(0)
1137; GFX942-NEXT: s_setpc_b64 s[30:31]
Matt Arsenaultef676642024-07-31 23:10:15 +04001138;
1139; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
1140; GFX10_1: ; %bb.0:
1141; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1142; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
1143; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80200
1144; GFX10_1-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill
1145; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
1146; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
1147; GFX10_1-NEXT: v_writelane_b32 v21, s30, 0
1148; GFX10_1-NEXT: v_writelane_b32 v21, s31, 1
1149; GFX10_1-NEXT: v_writelane_b32 v21, s33, 2
1150; GFX10_1-NEXT: v_writelane_b32 v21, s34, 3
1151; GFX10_1-NEXT: v_writelane_b32 v21, s35, 4
1152; GFX10_1-NEXT: v_writelane_b32 v21, s36, 5
1153; GFX10_1-NEXT: v_writelane_b32 v21, s37, 6
1154; GFX10_1-NEXT: v_writelane_b32 v21, s38, 7
1155; GFX10_1-NEXT: v_writelane_b32 v21, s39, 8
1156; GFX10_1-NEXT: v_writelane_b32 v21, s40, 9
1157; GFX10_1-NEXT: v_writelane_b32 v21, s41, 10
1158; GFX10_1-NEXT: v_writelane_b32 v21, s42, 11
1159; GFX10_1-NEXT: v_writelane_b32 v21, s43, 12
1160; GFX10_1-NEXT: v_writelane_b32 v21, s44, 13
1161; GFX10_1-NEXT: v_writelane_b32 v21, s45, 14
1162; GFX10_1-NEXT: v_writelane_b32 v21, s46, 15
1163; GFX10_1-NEXT: v_writelane_b32 v21, s47, 16
1164; GFX10_1-NEXT: v_writelane_b32 v21, s48, 17
1165; GFX10_1-NEXT: v_writelane_b32 v21, s49, 18
1166; GFX10_1-NEXT: v_writelane_b32 v21, s50, 19
1167; GFX10_1-NEXT: v_writelane_b32 v21, s51, 20
1168; GFX10_1-NEXT: v_writelane_b32 v21, s52, 21
1169; GFX10_1-NEXT: v_writelane_b32 v21, s53, 22
1170; GFX10_1-NEXT: v_writelane_b32 v21, s54, 23
1171; GFX10_1-NEXT: v_writelane_b32 v21, s55, 24
1172; GFX10_1-NEXT: v_writelane_b32 v21, s56, 25
1173; GFX10_1-NEXT: v_writelane_b32 v21, s57, 26
1174; GFX10_1-NEXT: v_writelane_b32 v21, s58, 27
1175; GFX10_1-NEXT: ;;#ASMSTART
1176; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
1177; GFX10_1-NEXT: ;;#ASMEND
1178; GFX10_1-NEXT: v_lshrrev_b32_e64 v22, 5, s32
1179; GFX10_1-NEXT: v_writelane_b32 v21, s59, 28
1180; GFX10_1-NEXT: s_and_b32 s59, 0, exec_lo
1181; GFX10_1-NEXT: v_add_nc_u32_e32 v22, 16, v22
1182; GFX10_1-NEXT: v_readfirstlane_b32 s59, v22
1183; GFX10_1-NEXT: ;;#ASMSTART
1184; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
1185; GFX10_1-NEXT: ;;#ASMEND
1186; GFX10_1-NEXT: v_readlane_b32 s59, v21, 28
1187; GFX10_1-NEXT: v_readlane_b32 s58, v21, 27
1188; GFX10_1-NEXT: v_readlane_b32 s57, v21, 26
1189; GFX10_1-NEXT: v_readlane_b32 s56, v21, 25
1190; GFX10_1-NEXT: v_readlane_b32 s55, v21, 24
1191; GFX10_1-NEXT: v_readlane_b32 s54, v21, 23
1192; GFX10_1-NEXT: v_readlane_b32 s53, v21, 22
1193; GFX10_1-NEXT: v_readlane_b32 s52, v21, 21
1194; GFX10_1-NEXT: v_readlane_b32 s51, v21, 20
1195; GFX10_1-NEXT: v_readlane_b32 s50, v21, 19
1196; GFX10_1-NEXT: v_readlane_b32 s49, v21, 18
1197; GFX10_1-NEXT: v_readlane_b32 s48, v21, 17
1198; GFX10_1-NEXT: v_readlane_b32 s47, v21, 16
1199; GFX10_1-NEXT: v_readlane_b32 s46, v21, 15
1200; GFX10_1-NEXT: v_readlane_b32 s45, v21, 14
1201; GFX10_1-NEXT: v_readlane_b32 s44, v21, 13
1202; GFX10_1-NEXT: v_readlane_b32 s43, v21, 12
1203; GFX10_1-NEXT: v_readlane_b32 s42, v21, 11
1204; GFX10_1-NEXT: v_readlane_b32 s41, v21, 10
1205; GFX10_1-NEXT: v_readlane_b32 s40, v21, 9
1206; GFX10_1-NEXT: v_readlane_b32 s39, v21, 8
1207; GFX10_1-NEXT: v_readlane_b32 s38, v21, 7
1208; GFX10_1-NEXT: v_readlane_b32 s37, v21, 6
1209; GFX10_1-NEXT: v_readlane_b32 s36, v21, 5
1210; GFX10_1-NEXT: v_readlane_b32 s35, v21, 4
1211; GFX10_1-NEXT: v_readlane_b32 s34, v21, 3
1212; GFX10_1-NEXT: v_readlane_b32 s33, v21, 2
1213; GFX10_1-NEXT: v_readlane_b32 s31, v21, 1
1214; GFX10_1-NEXT: v_readlane_b32 s30, v21, 0
1215; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
1216; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80200
1217; GFX10_1-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload
1218; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
1219; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
1220; GFX10_1-NEXT: s_waitcnt vmcnt(0)
1221; GFX10_1-NEXT: s_setpc_b64 s[30:31]
1222;
1223; GFX10_3-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
1224; GFX10_3: ; %bb.0:
1225; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1226; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
1227; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80200
1228; GFX10_3-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill
1229; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
1230; GFX10_3-NEXT: v_writelane_b32 v21, s30, 0
1231; GFX10_3-NEXT: v_writelane_b32 v21, s31, 1
1232; GFX10_3-NEXT: v_writelane_b32 v21, s33, 2
1233; GFX10_3-NEXT: v_writelane_b32 v21, s34, 3
1234; GFX10_3-NEXT: v_writelane_b32 v21, s35, 4
1235; GFX10_3-NEXT: v_writelane_b32 v21, s36, 5
1236; GFX10_3-NEXT: v_writelane_b32 v21, s37, 6
1237; GFX10_3-NEXT: v_writelane_b32 v21, s38, 7
1238; GFX10_3-NEXT: v_writelane_b32 v21, s39, 8
1239; GFX10_3-NEXT: v_writelane_b32 v21, s40, 9
1240; GFX10_3-NEXT: v_writelane_b32 v21, s41, 10
1241; GFX10_3-NEXT: v_writelane_b32 v21, s42, 11
1242; GFX10_3-NEXT: v_writelane_b32 v21, s43, 12
1243; GFX10_3-NEXT: v_writelane_b32 v21, s44, 13
1244; GFX10_3-NEXT: v_writelane_b32 v21, s45, 14
1245; GFX10_3-NEXT: v_writelane_b32 v21, s46, 15
1246; GFX10_3-NEXT: v_writelane_b32 v21, s47, 16
1247; GFX10_3-NEXT: v_writelane_b32 v21, s48, 17
1248; GFX10_3-NEXT: v_writelane_b32 v21, s49, 18
1249; GFX10_3-NEXT: v_writelane_b32 v21, s50, 19
1250; GFX10_3-NEXT: v_writelane_b32 v21, s51, 20
1251; GFX10_3-NEXT: v_writelane_b32 v21, s52, 21
1252; GFX10_3-NEXT: v_writelane_b32 v21, s53, 22
1253; GFX10_3-NEXT: v_writelane_b32 v21, s54, 23
1254; GFX10_3-NEXT: v_writelane_b32 v21, s55, 24
1255; GFX10_3-NEXT: v_writelane_b32 v21, s56, 25
1256; GFX10_3-NEXT: v_writelane_b32 v21, s57, 26
1257; GFX10_3-NEXT: v_writelane_b32 v21, s58, 27
1258; GFX10_3-NEXT: ;;#ASMSTART
1259; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
1260; GFX10_3-NEXT: ;;#ASMEND
1261; GFX10_3-NEXT: v_lshrrev_b32_e64 v22, 5, s32
1262; GFX10_3-NEXT: v_writelane_b32 v21, s59, 28
1263; GFX10_3-NEXT: s_and_b32 s59, 0, exec_lo
1264; GFX10_3-NEXT: v_add_nc_u32_e32 v22, 16, v22
1265; GFX10_3-NEXT: v_readfirstlane_b32 s59, v22
1266; GFX10_3-NEXT: ;;#ASMSTART
1267; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
1268; GFX10_3-NEXT: ;;#ASMEND
1269; GFX10_3-NEXT: v_readlane_b32 s59, v21, 28
1270; GFX10_3-NEXT: v_readlane_b32 s58, v21, 27
1271; GFX10_3-NEXT: v_readlane_b32 s57, v21, 26
1272; GFX10_3-NEXT: v_readlane_b32 s56, v21, 25
1273; GFX10_3-NEXT: v_readlane_b32 s55, v21, 24
1274; GFX10_3-NEXT: v_readlane_b32 s54, v21, 23
1275; GFX10_3-NEXT: v_readlane_b32 s53, v21, 22
1276; GFX10_3-NEXT: v_readlane_b32 s52, v21, 21
1277; GFX10_3-NEXT: v_readlane_b32 s51, v21, 20
1278; GFX10_3-NEXT: v_readlane_b32 s50, v21, 19
1279; GFX10_3-NEXT: v_readlane_b32 s49, v21, 18
1280; GFX10_3-NEXT: v_readlane_b32 s48, v21, 17
1281; GFX10_3-NEXT: v_readlane_b32 s47, v21, 16
1282; GFX10_3-NEXT: v_readlane_b32 s46, v21, 15
1283; GFX10_3-NEXT: v_readlane_b32 s45, v21, 14
1284; GFX10_3-NEXT: v_readlane_b32 s44, v21, 13
1285; GFX10_3-NEXT: v_readlane_b32 s43, v21, 12
1286; GFX10_3-NEXT: v_readlane_b32 s42, v21, 11
1287; GFX10_3-NEXT: v_readlane_b32 s41, v21, 10
1288; GFX10_3-NEXT: v_readlane_b32 s40, v21, 9
1289; GFX10_3-NEXT: v_readlane_b32 s39, v21, 8
1290; GFX10_3-NEXT: v_readlane_b32 s38, v21, 7
1291; GFX10_3-NEXT: v_readlane_b32 s37, v21, 6
1292; GFX10_3-NEXT: v_readlane_b32 s36, v21, 5
1293; GFX10_3-NEXT: v_readlane_b32 s35, v21, 4
1294; GFX10_3-NEXT: v_readlane_b32 s34, v21, 3
1295; GFX10_3-NEXT: v_readlane_b32 s33, v21, 2
1296; GFX10_3-NEXT: v_readlane_b32 s31, v21, 1
1297; GFX10_3-NEXT: v_readlane_b32 s30, v21, 0
1298; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
1299; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80200
1300; GFX10_3-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload
1301; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
1302; GFX10_3-NEXT: s_waitcnt vmcnt(0)
1303; GFX10_3-NEXT: s_setpc_b64 s[30:31]
1304;
1305; GFX11-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
1306; GFX11: ; %bb.0:
1307; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1308; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
1309; GFX11-NEXT: s_add_i32 s1, s32, 0x4010
1310; GFX11-NEXT: scratch_store_b32 off, v21, s1 ; 4-byte Folded Spill
1311; GFX11-NEXT: s_mov_b32 exec_lo, s0
1312; GFX11-NEXT: v_writelane_b32 v21, s30, 0
1313; GFX11-NEXT: v_writelane_b32 v21, s31, 1
1314; GFX11-NEXT: v_writelane_b32 v21, s33, 2
1315; GFX11-NEXT: v_writelane_b32 v21, s34, 3
1316; GFX11-NEXT: v_writelane_b32 v21, s35, 4
1317; GFX11-NEXT: v_writelane_b32 v21, s36, 5
1318; GFX11-NEXT: v_writelane_b32 v21, s37, 6
1319; GFX11-NEXT: v_writelane_b32 v21, s38, 7
1320; GFX11-NEXT: v_writelane_b32 v21, s39, 8
1321; GFX11-NEXT: v_writelane_b32 v21, s40, 9
1322; GFX11-NEXT: v_writelane_b32 v21, s41, 10
1323; GFX11-NEXT: v_writelane_b32 v21, s42, 11
1324; GFX11-NEXT: v_writelane_b32 v21, s43, 12
1325; GFX11-NEXT: v_writelane_b32 v21, s44, 13
1326; GFX11-NEXT: v_writelane_b32 v21, s45, 14
1327; GFX11-NEXT: v_writelane_b32 v21, s46, 15
1328; GFX11-NEXT: v_writelane_b32 v21, s47, 16
1329; GFX11-NEXT: v_writelane_b32 v21, s48, 17
1330; GFX11-NEXT: v_writelane_b32 v21, s49, 18
1331; GFX11-NEXT: v_writelane_b32 v21, s50, 19
1332; GFX11-NEXT: v_writelane_b32 v21, s51, 20
1333; GFX11-NEXT: v_writelane_b32 v21, s52, 21
1334; GFX11-NEXT: v_writelane_b32 v21, s53, 22
1335; GFX11-NEXT: v_writelane_b32 v21, s54, 23
1336; GFX11-NEXT: v_writelane_b32 v21, s55, 24
1337; GFX11-NEXT: v_writelane_b32 v21, s56, 25
1338; GFX11-NEXT: v_writelane_b32 v21, s57, 26
1339; GFX11-NEXT: v_writelane_b32 v21, s58, 27
1340; GFX11-NEXT: ;;#ASMSTART
1341; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
1342; GFX11-NEXT: ;;#ASMEND
1343; GFX11-NEXT: v_writelane_b32 v21, s59, 28
1344; GFX11-NEXT: s_and_b32 s59, 0, exec_lo
1345; GFX11-NEXT: s_addc_u32 s32, s32, 16
1346; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
1347; GFX11-NEXT: s_bitcmp1_b32 s32, 0
1348; GFX11-NEXT: s_bitset0_b32 s32, 0
1349; GFX11-NEXT: s_mov_b32 s59, s32
1350; GFX11-NEXT: s_addc_u32 s32, s32, -16
1351; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1352; GFX11-NEXT: s_bitcmp1_b32 s32, 0
1353; GFX11-NEXT: s_bitset0_b32 s32, 0
1354; GFX11-NEXT: ;;#ASMSTART
1355; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
1356; GFX11-NEXT: ;;#ASMEND
1357; GFX11-NEXT: v_readlane_b32 s59, v21, 28
1358; GFX11-NEXT: v_readlane_b32 s58, v21, 27
1359; GFX11-NEXT: v_readlane_b32 s57, v21, 26
1360; GFX11-NEXT: v_readlane_b32 s56, v21, 25
1361; GFX11-NEXT: v_readlane_b32 s55, v21, 24
1362; GFX11-NEXT: v_readlane_b32 s54, v21, 23
1363; GFX11-NEXT: v_readlane_b32 s53, v21, 22
1364; GFX11-NEXT: v_readlane_b32 s52, v21, 21
1365; GFX11-NEXT: v_readlane_b32 s51, v21, 20
1366; GFX11-NEXT: v_readlane_b32 s50, v21, 19
1367; GFX11-NEXT: v_readlane_b32 s49, v21, 18
1368; GFX11-NEXT: v_readlane_b32 s48, v21, 17
1369; GFX11-NEXT: v_readlane_b32 s47, v21, 16
1370; GFX11-NEXT: v_readlane_b32 s46, v21, 15
1371; GFX11-NEXT: v_readlane_b32 s45, v21, 14
1372; GFX11-NEXT: v_readlane_b32 s44, v21, 13
1373; GFX11-NEXT: v_readlane_b32 s43, v21, 12
1374; GFX11-NEXT: v_readlane_b32 s42, v21, 11
1375; GFX11-NEXT: v_readlane_b32 s41, v21, 10
1376; GFX11-NEXT: v_readlane_b32 s40, v21, 9
1377; GFX11-NEXT: v_readlane_b32 s39, v21, 8
1378; GFX11-NEXT: v_readlane_b32 s38, v21, 7
1379; GFX11-NEXT: v_readlane_b32 s37, v21, 6
1380; GFX11-NEXT: v_readlane_b32 s36, v21, 5
1381; GFX11-NEXT: v_readlane_b32 s35, v21, 4
1382; GFX11-NEXT: v_readlane_b32 s34, v21, 3
1383; GFX11-NEXT: v_readlane_b32 s33, v21, 2
1384; GFX11-NEXT: v_readlane_b32 s31, v21, 1
1385; GFX11-NEXT: v_readlane_b32 s30, v21, 0
1386; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
1387; GFX11-NEXT: s_add_i32 s1, s32, 0x4010
1388; GFX11-NEXT: scratch_load_b32 v21, off, s1 ; 4-byte Folded Reload
1389; GFX11-NEXT: s_mov_b32 exec_lo, s0
1390; GFX11-NEXT: s_waitcnt vmcnt(0)
1391; GFX11-NEXT: s_setpc_b64 s[30:31]
1392;
1393; GFX12-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
1394; GFX12: ; %bb.0:
1395; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1396; GFX12-NEXT: s_wait_expcnt 0x0
1397; GFX12-NEXT: s_wait_samplecnt 0x0
1398; GFX12-NEXT: s_wait_bvhcnt 0x0
1399; GFX12-NEXT: s_wait_kmcnt 0x0
1400; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
1401; GFX12-NEXT: scratch_store_b32 off, v21, s32 offset:16384 ; 4-byte Folded Spill
Carl Ritson86627142024-09-04 12:15:20 +09001402; GFX12-NEXT: s_wait_alu 0xfffe
Matt Arsenaultef676642024-07-31 23:10:15 +04001403; GFX12-NEXT: s_mov_b32 exec_lo, s0
1404; GFX12-NEXT: v_writelane_b32 v21, s30, 0
1405; GFX12-NEXT: v_writelane_b32 v21, s31, 1
1406; GFX12-NEXT: v_writelane_b32 v21, s33, 2
1407; GFX12-NEXT: v_writelane_b32 v21, s34, 3
1408; GFX12-NEXT: v_writelane_b32 v21, s35, 4
1409; GFX12-NEXT: v_writelane_b32 v21, s36, 5
1410; GFX12-NEXT: v_writelane_b32 v21, s37, 6
1411; GFX12-NEXT: v_writelane_b32 v21, s38, 7
1412; GFX12-NEXT: v_writelane_b32 v21, s39, 8
1413; GFX12-NEXT: v_writelane_b32 v21, s40, 9
1414; GFX12-NEXT: v_writelane_b32 v21, s41, 10
1415; GFX12-NEXT: v_writelane_b32 v21, s42, 11
1416; GFX12-NEXT: v_writelane_b32 v21, s43, 12
1417; GFX12-NEXT: v_writelane_b32 v21, s44, 13
1418; GFX12-NEXT: v_writelane_b32 v21, s45, 14
1419; GFX12-NEXT: v_writelane_b32 v21, s46, 15
1420; GFX12-NEXT: v_writelane_b32 v21, s47, 16
1421; GFX12-NEXT: v_writelane_b32 v21, s48, 17
1422; GFX12-NEXT: v_writelane_b32 v21, s49, 18
1423; GFX12-NEXT: v_writelane_b32 v21, s50, 19
1424; GFX12-NEXT: v_writelane_b32 v21, s51, 20
1425; GFX12-NEXT: v_writelane_b32 v21, s52, 21
1426; GFX12-NEXT: v_writelane_b32 v21, s53, 22
1427; GFX12-NEXT: v_writelane_b32 v21, s54, 23
1428; GFX12-NEXT: v_writelane_b32 v21, s55, 24
1429; GFX12-NEXT: v_writelane_b32 v21, s56, 25
1430; GFX12-NEXT: v_writelane_b32 v21, s57, 26
1431; GFX12-NEXT: v_writelane_b32 v21, s58, 27
1432; GFX12-NEXT: ;;#ASMSTART
1433; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
1434; GFX12-NEXT: ;;#ASMEND
1435; GFX12-NEXT: v_writelane_b32 v21, s59, 28
1436; GFX12-NEXT: s_and_b32 s59, 0, exec_lo
1437; GFX12-NEXT: s_mov_b32 s59, s32
1438; GFX12-NEXT: ;;#ASMSTART
1439; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
1440; GFX12-NEXT: ;;#ASMEND
1441; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
1442; GFX12-NEXT: v_readlane_b32 s59, v21, 28
1443; GFX12-NEXT: v_readlane_b32 s58, v21, 27
1444; GFX12-NEXT: v_readlane_b32 s57, v21, 26
1445; GFX12-NEXT: v_readlane_b32 s56, v21, 25
1446; GFX12-NEXT: v_readlane_b32 s55, v21, 24
1447; GFX12-NEXT: v_readlane_b32 s54, v21, 23
1448; GFX12-NEXT: v_readlane_b32 s53, v21, 22
1449; GFX12-NEXT: v_readlane_b32 s52, v21, 21
1450; GFX12-NEXT: v_readlane_b32 s51, v21, 20
1451; GFX12-NEXT: v_readlane_b32 s50, v21, 19
1452; GFX12-NEXT: v_readlane_b32 s49, v21, 18
1453; GFX12-NEXT: v_readlane_b32 s48, v21, 17
1454; GFX12-NEXT: v_readlane_b32 s47, v21, 16
1455; GFX12-NEXT: v_readlane_b32 s46, v21, 15
1456; GFX12-NEXT: v_readlane_b32 s45, v21, 14
1457; GFX12-NEXT: v_readlane_b32 s44, v21, 13
1458; GFX12-NEXT: v_readlane_b32 s43, v21, 12
1459; GFX12-NEXT: v_readlane_b32 s42, v21, 11
1460; GFX12-NEXT: v_readlane_b32 s41, v21, 10
1461; GFX12-NEXT: v_readlane_b32 s40, v21, 9
1462; GFX12-NEXT: v_readlane_b32 s39, v21, 8
1463; GFX12-NEXT: v_readlane_b32 s38, v21, 7
1464; GFX12-NEXT: v_readlane_b32 s37, v21, 6
1465; GFX12-NEXT: v_readlane_b32 s36, v21, 5
1466; GFX12-NEXT: v_readlane_b32 s35, v21, 4
1467; GFX12-NEXT: v_readlane_b32 s34, v21, 3
1468; GFX12-NEXT: v_readlane_b32 s33, v21, 2
1469; GFX12-NEXT: v_readlane_b32 s31, v21, 1
1470; GFX12-NEXT: v_readlane_b32 s30, v21, 0
1471; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
1472; GFX12-NEXT: scratch_load_b32 v21, off, s32 offset:16384 ; 4-byte Folded Reload
Carl Ritson86627142024-09-04 12:15:20 +09001473; GFX12-NEXT: s_wait_alu 0xfffe
Matt Arsenaultef676642024-07-31 23:10:15 +04001474; GFX12-NEXT: s_mov_b32 exec_lo, s0
1475; GFX12-NEXT: s_wait_loadcnt 0x0
1476; GFX12-NEXT: s_setpc_b64 s[30:31]
1477 %alloca0 = alloca [4096 x i32], align 16, addrspace(5)
1478
1479 ; Force no SGPRs to be available for the carry-out of the vector add.
1480 %asm = call %asm.output2 asm sideeffect
1481 "; def $0, $1, $2, $3, $4, $5, $6, $7, $8",
1482 "={s[0:15]},={s[16:31]},={s[32:47]},={s[48:55]},={s[56:57]},={s58},={v[0:15]},={v[16:20]},={vcc}"()
1483
1484 %s0 = extractvalue %asm.output2 %asm, 0
1485 %s1 = extractvalue %asm.output2 %asm, 1
1486 %s2 = extractvalue %asm.output2 %asm, 2
1487 %s3 = extractvalue %asm.output2 %asm, 3
1488 %s4 = extractvalue %asm.output2 %asm, 4
1489 %s5 = extractvalue %asm.output2 %asm, 5
1490
1491 %v0 = extractvalue %asm.output2 %asm, 6
1492 %v1 = extractvalue %asm.output2 %asm, 7
1493
1494 %vcc = extractvalue %asm.output2 %asm, 8
1495
1496 ; scc is unavailable since it is live in
1497 call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10",
1498 "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:20]},{vcc},{s59},{scc}"(
1499 <16 x i32> %s0,
1500 <16 x i32> %s1,
1501 <16 x i32> %s2,
1502 <8 x i32> %s3,
1503 <2 x i32> %s4,
1504 i32 %s5,
1505 <16 x i32> %v0,
1506 <5 x i32> %v1,
1507 i64 %vcc,
1508 ptr addrspace(5) %alloca0,
1509 i32 0) ; use of scc
1510
1511 ret void
1512}
1513
1514; This case isn't using SGPRs yet.
1515; FIXME: Should also use one more VGPR, but currently fails to allocate on gfx8.
1516define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset() #0 {
1517; GFX7-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
1518; GFX7: ; %bb.0:
1519; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1520; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001521; GFX7-NEXT: s_add_i32 s6, s32, 0x201000
Matt Arsenaultef676642024-07-31 23:10:15 +04001522; GFX7-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001523; GFX7-NEXT: s_add_i32 s6, s32, 0x201100
Matt Arsenaultef676642024-07-31 23:10:15 +04001524; GFX7-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
1525; GFX7-NEXT: s_mov_b64 exec, s[4:5]
1526; GFX7-NEXT: v_writelane_b32 v23, s28, 28
1527; GFX7-NEXT: v_writelane_b32 v23, s29, 29
1528; GFX7-NEXT: v_writelane_b32 v23, s30, 0
1529; GFX7-NEXT: v_writelane_b32 v23, s31, 1
1530; GFX7-NEXT: v_writelane_b32 v23, s33, 2
1531; GFX7-NEXT: v_writelane_b32 v23, s34, 3
1532; GFX7-NEXT: v_writelane_b32 v23, s35, 4
1533; GFX7-NEXT: v_writelane_b32 v23, s36, 5
1534; GFX7-NEXT: v_writelane_b32 v23, s37, 6
1535; GFX7-NEXT: v_writelane_b32 v23, s38, 7
1536; GFX7-NEXT: v_writelane_b32 v23, s39, 8
1537; GFX7-NEXT: v_writelane_b32 v23, s40, 9
1538; GFX7-NEXT: v_writelane_b32 v23, s41, 10
1539; GFX7-NEXT: v_writelane_b32 v23, s42, 11
1540; GFX7-NEXT: v_writelane_b32 v23, s43, 12
1541; GFX7-NEXT: v_writelane_b32 v23, s44, 13
1542; GFX7-NEXT: v_writelane_b32 v23, s45, 14
1543; GFX7-NEXT: v_writelane_b32 v23, s46, 15
1544; GFX7-NEXT: v_writelane_b32 v23, s47, 16
1545; GFX7-NEXT: v_writelane_b32 v23, s48, 17
1546; GFX7-NEXT: v_writelane_b32 v23, s49, 18
1547; GFX7-NEXT: v_writelane_b32 v23, s50, 19
1548; GFX7-NEXT: v_writelane_b32 v23, s51, 20
1549; GFX7-NEXT: v_writelane_b32 v23, s52, 21
1550; GFX7-NEXT: v_writelane_b32 v23, s53, 22
1551; GFX7-NEXT: v_writelane_b32 v23, s54, 23
1552; GFX7-NEXT: v_writelane_b32 v23, s55, 24
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001553; GFX7-NEXT: s_lshr_b32 s5, s32, 6
Matt Arsenaultef676642024-07-31 23:10:15 +04001554; GFX7-NEXT: v_writelane_b32 v23, s56, 25
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001555; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6
1556; GFX7-NEXT: s_add_i32 s4, s5, 0x4240
1557; GFX7-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
Matt Arsenaultef676642024-07-31 23:10:15 +04001558; GFX7-NEXT: v_writelane_b32 v23, s57, 26
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001559; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0
1560; GFX7-NEXT: v_writelane_b32 v22, s4, 0
1561; GFX7-NEXT: s_and_b64 s[4:5], 0, exec
1562; GFX7-NEXT: v_writelane_b32 v23, s59, 27
Matt Arsenaultef676642024-07-31 23:10:15 +04001563; GFX7-NEXT: ;;#ASMSTART
1564; GFX7-NEXT: ; use alloca0 v0
1565; GFX7-NEXT: ;;#ASMEND
1566; GFX7-NEXT: ;;#ASMSTART
1567; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
1568; GFX7-NEXT: ;;#ASMEND
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001569; GFX7-NEXT: v_readlane_b32 s59, v22, 0
Matt Arsenaultef676642024-07-31 23:10:15 +04001570; GFX7-NEXT: ;;#ASMSTART
1571; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
1572; GFX7-NEXT: ;;#ASMEND
1573; GFX7-NEXT: v_readlane_b32 s59, v23, 27
1574; GFX7-NEXT: v_readlane_b32 s57, v23, 26
1575; GFX7-NEXT: v_readlane_b32 s56, v23, 25
1576; GFX7-NEXT: v_readlane_b32 s55, v23, 24
1577; GFX7-NEXT: v_readlane_b32 s54, v23, 23
1578; GFX7-NEXT: v_readlane_b32 s53, v23, 22
1579; GFX7-NEXT: v_readlane_b32 s52, v23, 21
1580; GFX7-NEXT: v_readlane_b32 s51, v23, 20
1581; GFX7-NEXT: v_readlane_b32 s50, v23, 19
1582; GFX7-NEXT: v_readlane_b32 s49, v23, 18
1583; GFX7-NEXT: v_readlane_b32 s48, v23, 17
1584; GFX7-NEXT: v_readlane_b32 s47, v23, 16
1585; GFX7-NEXT: v_readlane_b32 s46, v23, 15
1586; GFX7-NEXT: v_readlane_b32 s45, v23, 14
1587; GFX7-NEXT: v_readlane_b32 s44, v23, 13
1588; GFX7-NEXT: v_readlane_b32 s43, v23, 12
1589; GFX7-NEXT: v_readlane_b32 s42, v23, 11
1590; GFX7-NEXT: v_readlane_b32 s41, v23, 10
1591; GFX7-NEXT: v_readlane_b32 s40, v23, 9
1592; GFX7-NEXT: v_readlane_b32 s39, v23, 8
1593; GFX7-NEXT: v_readlane_b32 s38, v23, 7
1594; GFX7-NEXT: v_readlane_b32 s37, v23, 6
1595; GFX7-NEXT: v_readlane_b32 s36, v23, 5
1596; GFX7-NEXT: v_readlane_b32 s35, v23, 4
1597; GFX7-NEXT: v_readlane_b32 s34, v23, 3
1598; GFX7-NEXT: v_readlane_b32 s33, v23, 2
1599; GFX7-NEXT: v_readlane_b32 s31, v23, 1
1600; GFX7-NEXT: v_readlane_b32 s30, v23, 0
Matt Arsenaultef676642024-07-31 23:10:15 +04001601; GFX7-NEXT: v_readlane_b32 s28, v23, 28
1602; GFX7-NEXT: v_readlane_b32 s29, v23, 29
1603; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001604; GFX7-NEXT: s_add_i32 s6, s32, 0x201000
Matt Arsenaultef676642024-07-31 23:10:15 +04001605; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001606; GFX7-NEXT: s_add_i32 s6, s32, 0x201100
Matt Arsenaultef676642024-07-31 23:10:15 +04001607; GFX7-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
1608; GFX7-NEXT: s_mov_b64 exec, s[4:5]
1609; GFX7-NEXT: s_waitcnt vmcnt(0)
1610; GFX7-NEXT: s_setpc_b64 s[30:31]
1611;
1612; GFX8-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
1613; GFX8: ; %bb.0:
1614; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1615; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001616; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
Matt Arsenaultef676642024-07-31 23:10:15 +04001617; GFX8-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
1618; GFX8-NEXT: s_mov_b64 exec, s[4:5]
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001619; GFX8-NEXT: v_writelane_b32 v22, s30, 0
1620; GFX8-NEXT: v_writelane_b32 v22, s31, 1
1621; GFX8-NEXT: v_writelane_b32 v22, s33, 2
1622; GFX8-NEXT: v_writelane_b32 v22, s34, 3
1623; GFX8-NEXT: v_writelane_b32 v22, s35, 4
1624; GFX8-NEXT: v_writelane_b32 v22, s36, 5
1625; GFX8-NEXT: v_writelane_b32 v22, s37, 6
1626; GFX8-NEXT: v_writelane_b32 v22, s38, 7
1627; GFX8-NEXT: v_writelane_b32 v22, s39, 8
1628; GFX8-NEXT: v_writelane_b32 v22, s40, 9
1629; GFX8-NEXT: v_writelane_b32 v22, s41, 10
1630; GFX8-NEXT: v_writelane_b32 v22, s42, 11
1631; GFX8-NEXT: v_writelane_b32 v22, s43, 12
1632; GFX8-NEXT: v_writelane_b32 v22, s44, 13
1633; GFX8-NEXT: v_writelane_b32 v22, s45, 14
1634; GFX8-NEXT: v_writelane_b32 v22, s46, 15
1635; GFX8-NEXT: v_writelane_b32 v22, s47, 16
1636; GFX8-NEXT: v_writelane_b32 v22, s48, 17
1637; GFX8-NEXT: v_writelane_b32 v22, s49, 18
1638; GFX8-NEXT: v_writelane_b32 v22, s50, 19
1639; GFX8-NEXT: v_writelane_b32 v22, s51, 20
1640; GFX8-NEXT: v_writelane_b32 v22, s52, 21
1641; GFX8-NEXT: v_writelane_b32 v22, s53, 22
1642; GFX8-NEXT: v_writelane_b32 v22, s54, 23
1643; GFX8-NEXT: v_writelane_b32 v22, s55, 24
1644; GFX8-NEXT: v_writelane_b32 v22, s56, 25
1645; GFX8-NEXT: v_writelane_b32 v22, s57, 26
1646; GFX8-NEXT: s_lshr_b32 s4, s32, 6
1647; GFX8-NEXT: v_writelane_b32 v22, s59, 27
Matt Arsenaultef676642024-07-31 23:10:15 +04001648; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001649; GFX8-NEXT: s_add_i32 s59, s4, 0x4240
Matt Arsenaultef676642024-07-31 23:10:15 +04001650; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001651; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
Matt Arsenaultef676642024-07-31 23:10:15 +04001652; GFX8-NEXT: ;;#ASMSTART
1653; GFX8-NEXT: ; use alloca0 v0
1654; GFX8-NEXT: ;;#ASMEND
1655; GFX8-NEXT: ;;#ASMSTART
1656; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
1657; GFX8-NEXT: ;;#ASMEND
Matt Arsenaultef676642024-07-31 23:10:15 +04001658; GFX8-NEXT: ;;#ASMSTART
1659; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
1660; GFX8-NEXT: ;;#ASMEND
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001661; GFX8-NEXT: v_readlane_b32 s59, v22, 27
1662; GFX8-NEXT: v_readlane_b32 s57, v22, 26
1663; GFX8-NEXT: v_readlane_b32 s56, v22, 25
1664; GFX8-NEXT: v_readlane_b32 s55, v22, 24
1665; GFX8-NEXT: v_readlane_b32 s54, v22, 23
1666; GFX8-NEXT: v_readlane_b32 s53, v22, 22
1667; GFX8-NEXT: v_readlane_b32 s52, v22, 21
1668; GFX8-NEXT: v_readlane_b32 s51, v22, 20
1669; GFX8-NEXT: v_readlane_b32 s50, v22, 19
1670; GFX8-NEXT: v_readlane_b32 s49, v22, 18
1671; GFX8-NEXT: v_readlane_b32 s48, v22, 17
1672; GFX8-NEXT: v_readlane_b32 s47, v22, 16
1673; GFX8-NEXT: v_readlane_b32 s46, v22, 15
1674; GFX8-NEXT: v_readlane_b32 s45, v22, 14
1675; GFX8-NEXT: v_readlane_b32 s44, v22, 13
1676; GFX8-NEXT: v_readlane_b32 s43, v22, 12
1677; GFX8-NEXT: v_readlane_b32 s42, v22, 11
1678; GFX8-NEXT: v_readlane_b32 s41, v22, 10
1679; GFX8-NEXT: v_readlane_b32 s40, v22, 9
1680; GFX8-NEXT: v_readlane_b32 s39, v22, 8
1681; GFX8-NEXT: v_readlane_b32 s38, v22, 7
1682; GFX8-NEXT: v_readlane_b32 s37, v22, 6
1683; GFX8-NEXT: v_readlane_b32 s36, v22, 5
1684; GFX8-NEXT: v_readlane_b32 s35, v22, 4
1685; GFX8-NEXT: v_readlane_b32 s34, v22, 3
1686; GFX8-NEXT: v_readlane_b32 s33, v22, 2
1687; GFX8-NEXT: v_readlane_b32 s31, v22, 1
1688; GFX8-NEXT: v_readlane_b32 s30, v22, 0
Matt Arsenaultef676642024-07-31 23:10:15 +04001689; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001690; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
Matt Arsenaultef676642024-07-31 23:10:15 +04001691; GFX8-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
1692; GFX8-NEXT: s_mov_b64 exec, s[4:5]
1693; GFX8-NEXT: s_waitcnt vmcnt(0)
1694; GFX8-NEXT: s_setpc_b64 s[30:31]
1695;
1696; GFX900-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
1697; GFX900: ; %bb.0:
1698; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1699; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
1700; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
1701; GFX900-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
1702; GFX900-NEXT: s_mov_b64 exec, s[4:5]
1703; GFX900-NEXT: v_writelane_b32 v22, s30, 0
1704; GFX900-NEXT: v_writelane_b32 v22, s31, 1
1705; GFX900-NEXT: v_writelane_b32 v22, s33, 2
1706; GFX900-NEXT: v_writelane_b32 v22, s34, 3
1707; GFX900-NEXT: v_writelane_b32 v22, s35, 4
1708; GFX900-NEXT: v_writelane_b32 v22, s36, 5
1709; GFX900-NEXT: v_writelane_b32 v22, s37, 6
1710; GFX900-NEXT: v_writelane_b32 v22, s38, 7
1711; GFX900-NEXT: v_writelane_b32 v22, s39, 8
1712; GFX900-NEXT: v_writelane_b32 v22, s40, 9
1713; GFX900-NEXT: v_writelane_b32 v22, s41, 10
1714; GFX900-NEXT: v_writelane_b32 v22, s42, 11
1715; GFX900-NEXT: v_writelane_b32 v22, s43, 12
1716; GFX900-NEXT: v_writelane_b32 v22, s44, 13
1717; GFX900-NEXT: v_writelane_b32 v22, s45, 14
1718; GFX900-NEXT: v_writelane_b32 v22, s46, 15
1719; GFX900-NEXT: v_writelane_b32 v22, s47, 16
1720; GFX900-NEXT: v_writelane_b32 v22, s48, 17
1721; GFX900-NEXT: v_writelane_b32 v22, s49, 18
1722; GFX900-NEXT: v_writelane_b32 v22, s50, 19
1723; GFX900-NEXT: v_writelane_b32 v22, s51, 20
1724; GFX900-NEXT: v_writelane_b32 v22, s52, 21
1725; GFX900-NEXT: v_writelane_b32 v22, s53, 22
Matt Arsenaultef676642024-07-31 23:10:15 +04001726; GFX900-NEXT: v_writelane_b32 v22, s54, 23
Matt Arsenaultef676642024-07-31 23:10:15 +04001727; GFX900-NEXT: v_writelane_b32 v22, s55, 24
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001728; GFX900-NEXT: v_writelane_b32 v22, s56, 25
1729; GFX900-NEXT: v_writelane_b32 v22, s57, 26
1730; GFX900-NEXT: s_lshr_b32 s4, s32, 6
1731; GFX900-NEXT: v_writelane_b32 v22, s59, 27
1732; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
1733; GFX900-NEXT: s_add_i32 s59, s4, 0x4240
1734; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
1735; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
Matt Arsenaultef676642024-07-31 23:10:15 +04001736; GFX900-NEXT: ;;#ASMSTART
1737; GFX900-NEXT: ; use alloca0 v0
1738; GFX900-NEXT: ;;#ASMEND
Matt Arsenaultef676642024-07-31 23:10:15 +04001739; GFX900-NEXT: ;;#ASMSTART
1740; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
1741; GFX900-NEXT: ;;#ASMEND
1742; GFX900-NEXT: ;;#ASMSTART
1743; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
1744; GFX900-NEXT: ;;#ASMEND
1745; GFX900-NEXT: v_readlane_b32 s59, v22, 27
1746; GFX900-NEXT: v_readlane_b32 s57, v22, 26
1747; GFX900-NEXT: v_readlane_b32 s56, v22, 25
1748; GFX900-NEXT: v_readlane_b32 s55, v22, 24
1749; GFX900-NEXT: v_readlane_b32 s54, v22, 23
1750; GFX900-NEXT: v_readlane_b32 s53, v22, 22
1751; GFX900-NEXT: v_readlane_b32 s52, v22, 21
1752; GFX900-NEXT: v_readlane_b32 s51, v22, 20
1753; GFX900-NEXT: v_readlane_b32 s50, v22, 19
1754; GFX900-NEXT: v_readlane_b32 s49, v22, 18
1755; GFX900-NEXT: v_readlane_b32 s48, v22, 17
1756; GFX900-NEXT: v_readlane_b32 s47, v22, 16
1757; GFX900-NEXT: v_readlane_b32 s46, v22, 15
1758; GFX900-NEXT: v_readlane_b32 s45, v22, 14
1759; GFX900-NEXT: v_readlane_b32 s44, v22, 13
1760; GFX900-NEXT: v_readlane_b32 s43, v22, 12
1761; GFX900-NEXT: v_readlane_b32 s42, v22, 11
1762; GFX900-NEXT: v_readlane_b32 s41, v22, 10
1763; GFX900-NEXT: v_readlane_b32 s40, v22, 9
1764; GFX900-NEXT: v_readlane_b32 s39, v22, 8
1765; GFX900-NEXT: v_readlane_b32 s38, v22, 7
1766; GFX900-NEXT: v_readlane_b32 s37, v22, 6
1767; GFX900-NEXT: v_readlane_b32 s36, v22, 5
1768; GFX900-NEXT: v_readlane_b32 s35, v22, 4
1769; GFX900-NEXT: v_readlane_b32 s34, v22, 3
1770; GFX900-NEXT: v_readlane_b32 s33, v22, 2
1771; GFX900-NEXT: v_readlane_b32 s31, v22, 1
1772; GFX900-NEXT: v_readlane_b32 s30, v22, 0
1773; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
1774; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
1775; GFX900-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
1776; GFX900-NEXT: s_mov_b64 exec, s[4:5]
1777; GFX900-NEXT: s_waitcnt vmcnt(0)
1778; GFX900-NEXT: s_setpc_b64 s[30:31]
1779;
Fabian Rittera33a84e2025-02-13 15:17:12 +01001780; GFX942-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
1781; GFX942: ; %bb.0:
1782; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1783; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
1784; GFX942-NEXT: s_add_i32 s2, s32, 0x8040
1785; GFX942-NEXT: scratch_store_dword off, v22, s2 ; 4-byte Folded Spill
1786; GFX942-NEXT: s_mov_b64 exec, s[0:1]
1787; GFX942-NEXT: v_writelane_b32 v22, s30, 0
1788; GFX942-NEXT: v_writelane_b32 v22, s31, 1
1789; GFX942-NEXT: v_writelane_b32 v22, s33, 2
1790; GFX942-NEXT: v_writelane_b32 v22, s34, 3
1791; GFX942-NEXT: v_writelane_b32 v22, s35, 4
1792; GFX942-NEXT: v_writelane_b32 v22, s36, 5
1793; GFX942-NEXT: v_writelane_b32 v22, s37, 6
1794; GFX942-NEXT: v_writelane_b32 v22, s38, 7
1795; GFX942-NEXT: v_writelane_b32 v22, s39, 8
1796; GFX942-NEXT: v_writelane_b32 v22, s40, 9
1797; GFX942-NEXT: v_writelane_b32 v22, s41, 10
1798; GFX942-NEXT: v_writelane_b32 v22, s42, 11
1799; GFX942-NEXT: v_writelane_b32 v22, s43, 12
1800; GFX942-NEXT: v_writelane_b32 v22, s44, 13
1801; GFX942-NEXT: v_writelane_b32 v22, s45, 14
1802; GFX942-NEXT: v_writelane_b32 v22, s46, 15
1803; GFX942-NEXT: v_writelane_b32 v22, s47, 16
1804; GFX942-NEXT: v_writelane_b32 v22, s48, 17
1805; GFX942-NEXT: v_writelane_b32 v22, s49, 18
1806; GFX942-NEXT: v_writelane_b32 v22, s50, 19
1807; GFX942-NEXT: v_writelane_b32 v22, s51, 20
1808; GFX942-NEXT: v_writelane_b32 v22, s52, 21
1809; GFX942-NEXT: v_writelane_b32 v22, s53, 22
1810; GFX942-NEXT: v_writelane_b32 v22, s54, 23
1811; GFX942-NEXT: v_writelane_b32 v22, s55, 24
1812; GFX942-NEXT: v_writelane_b32 v22, s56, 25
1813; GFX942-NEXT: v_writelane_b32 v22, s57, 26
1814; GFX942-NEXT: s_add_i32 s0, s32, 64
1815; GFX942-NEXT: v_writelane_b32 v22, s59, 27
1816; GFX942-NEXT: v_mov_b32_e32 v0, s0
1817; GFX942-NEXT: v_writelane_b32 v22, s60, 28
1818; GFX942-NEXT: ;;#ASMSTART
1819; GFX942-NEXT: ; use alloca0 v0
1820; GFX942-NEXT: ;;#ASMEND
1821; GFX942-NEXT: ;;#ASMSTART
1822; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
1823; GFX942-NEXT: ;;#ASMEND
1824; GFX942-NEXT: s_add_i32 s59, s32, 0x4240
1825; GFX942-NEXT: v_writelane_b32 v22, s61, 29
1826; GFX942-NEXT: s_and_b64 s[60:61], 0, exec
1827; GFX942-NEXT: ;;#ASMSTART
1828; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
1829; GFX942-NEXT: ;;#ASMEND
1830; GFX942-NEXT: v_readlane_b32 s61, v22, 29
1831; GFX942-NEXT: v_readlane_b32 s60, v22, 28
1832; GFX942-NEXT: v_readlane_b32 s59, v22, 27
1833; GFX942-NEXT: v_readlane_b32 s57, v22, 26
1834; GFX942-NEXT: v_readlane_b32 s56, v22, 25
1835; GFX942-NEXT: v_readlane_b32 s55, v22, 24
1836; GFX942-NEXT: v_readlane_b32 s54, v22, 23
1837; GFX942-NEXT: v_readlane_b32 s53, v22, 22
1838; GFX942-NEXT: v_readlane_b32 s52, v22, 21
1839; GFX942-NEXT: v_readlane_b32 s51, v22, 20
1840; GFX942-NEXT: v_readlane_b32 s50, v22, 19
1841; GFX942-NEXT: v_readlane_b32 s49, v22, 18
1842; GFX942-NEXT: v_readlane_b32 s48, v22, 17
1843; GFX942-NEXT: v_readlane_b32 s47, v22, 16
1844; GFX942-NEXT: v_readlane_b32 s46, v22, 15
1845; GFX942-NEXT: v_readlane_b32 s45, v22, 14
1846; GFX942-NEXT: v_readlane_b32 s44, v22, 13
1847; GFX942-NEXT: v_readlane_b32 s43, v22, 12
1848; GFX942-NEXT: v_readlane_b32 s42, v22, 11
1849; GFX942-NEXT: v_readlane_b32 s41, v22, 10
1850; GFX942-NEXT: v_readlane_b32 s40, v22, 9
1851; GFX942-NEXT: v_readlane_b32 s39, v22, 8
1852; GFX942-NEXT: v_readlane_b32 s38, v22, 7
1853; GFX942-NEXT: v_readlane_b32 s37, v22, 6
1854; GFX942-NEXT: v_readlane_b32 s36, v22, 5
1855; GFX942-NEXT: v_readlane_b32 s35, v22, 4
1856; GFX942-NEXT: v_readlane_b32 s34, v22, 3
1857; GFX942-NEXT: v_readlane_b32 s33, v22, 2
1858; GFX942-NEXT: v_readlane_b32 s31, v22, 1
1859; GFX942-NEXT: v_readlane_b32 s30, v22, 0
1860; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
1861; GFX942-NEXT: s_add_i32 s2, s32, 0x8040
1862; GFX942-NEXT: scratch_load_dword v22, off, s2 ; 4-byte Folded Reload
1863; GFX942-NEXT: s_mov_b64 exec, s[0:1]
1864; GFX942-NEXT: s_waitcnt vmcnt(0)
1865; GFX942-NEXT: s_setpc_b64 s[30:31]
Matt Arsenaultef676642024-07-31 23:10:15 +04001866;
1867; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
1868; GFX10_1: ; %bb.0:
1869; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1870; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
1871; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001872; GFX10_1-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill
Matt Arsenaultef676642024-07-31 23:10:15 +04001873; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
1874; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001875; GFX10_1-NEXT: v_writelane_b32 v22, s30, 0
Matt Arsenaultef91cd32024-10-19 12:33:03 -07001876; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001877; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5
1878; GFX10_1-NEXT: v_writelane_b32 v22, s31, 1
Matt Arsenaultef91cd32024-10-19 12:33:03 -07001879; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
Matt Arsenaultef676642024-07-31 23:10:15 +04001880; GFX10_1-NEXT: ;;#ASMSTART
Matt Arsenaultef91cd32024-10-19 12:33:03 -07001881; GFX10_1-NEXT: ; use alloca0 v0
Matt Arsenaultef676642024-07-31 23:10:15 +04001882; GFX10_1-NEXT: ;;#ASMEND
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001883; GFX10_1-NEXT: v_writelane_b32 v22, s33, 2
1884; GFX10_1-NEXT: v_writelane_b32 v22, s34, 3
1885; GFX10_1-NEXT: v_writelane_b32 v22, s35, 4
1886; GFX10_1-NEXT: v_writelane_b32 v22, s36, 5
1887; GFX10_1-NEXT: v_writelane_b32 v22, s37, 6
1888; GFX10_1-NEXT: v_writelane_b32 v22, s38, 7
1889; GFX10_1-NEXT: v_writelane_b32 v22, s39, 8
1890; GFX10_1-NEXT: v_writelane_b32 v22, s40, 9
1891; GFX10_1-NEXT: v_writelane_b32 v22, s41, 10
1892; GFX10_1-NEXT: v_writelane_b32 v22, s42, 11
1893; GFX10_1-NEXT: v_writelane_b32 v22, s43, 12
1894; GFX10_1-NEXT: v_writelane_b32 v22, s44, 13
1895; GFX10_1-NEXT: v_writelane_b32 v22, s45, 14
1896; GFX10_1-NEXT: v_writelane_b32 v22, s46, 15
1897; GFX10_1-NEXT: v_writelane_b32 v22, s47, 16
1898; GFX10_1-NEXT: v_writelane_b32 v22, s48, 17
1899; GFX10_1-NEXT: v_writelane_b32 v22, s49, 18
1900; GFX10_1-NEXT: v_writelane_b32 v22, s50, 19
1901; GFX10_1-NEXT: v_writelane_b32 v22, s51, 20
1902; GFX10_1-NEXT: v_writelane_b32 v22, s52, 21
1903; GFX10_1-NEXT: v_writelane_b32 v22, s53, 22
1904; GFX10_1-NEXT: v_writelane_b32 v22, s54, 23
1905; GFX10_1-NEXT: v_writelane_b32 v22, s55, 24
1906; GFX10_1-NEXT: v_writelane_b32 v22, s56, 25
1907; GFX10_1-NEXT: v_writelane_b32 v22, s57, 26
1908; GFX10_1-NEXT: v_writelane_b32 v22, s59, 27
1909; GFX10_1-NEXT: s_add_i32 s59, s4, 0x4240
1910; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
Matt Arsenaultef676642024-07-31 23:10:15 +04001911; GFX10_1-NEXT: ;;#ASMSTART
1912; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
1913; GFX10_1-NEXT: ;;#ASMEND
Matt Arsenaultef676642024-07-31 23:10:15 +04001914; GFX10_1-NEXT: ;;#ASMSTART
1915; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
1916; GFX10_1-NEXT: ;;#ASMEND
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001917; GFX10_1-NEXT: v_readlane_b32 s59, v22, 27
1918; GFX10_1-NEXT: v_readlane_b32 s57, v22, 26
1919; GFX10_1-NEXT: v_readlane_b32 s56, v22, 25
1920; GFX10_1-NEXT: v_readlane_b32 s55, v22, 24
1921; GFX10_1-NEXT: v_readlane_b32 s54, v22, 23
1922; GFX10_1-NEXT: v_readlane_b32 s53, v22, 22
1923; GFX10_1-NEXT: v_readlane_b32 s52, v22, 21
1924; GFX10_1-NEXT: v_readlane_b32 s51, v22, 20
1925; GFX10_1-NEXT: v_readlane_b32 s50, v22, 19
1926; GFX10_1-NEXT: v_readlane_b32 s49, v22, 18
1927; GFX10_1-NEXT: v_readlane_b32 s48, v22, 17
1928; GFX10_1-NEXT: v_readlane_b32 s47, v22, 16
1929; GFX10_1-NEXT: v_readlane_b32 s46, v22, 15
1930; GFX10_1-NEXT: v_readlane_b32 s45, v22, 14
1931; GFX10_1-NEXT: v_readlane_b32 s44, v22, 13
1932; GFX10_1-NEXT: v_readlane_b32 s43, v22, 12
1933; GFX10_1-NEXT: v_readlane_b32 s42, v22, 11
1934; GFX10_1-NEXT: v_readlane_b32 s41, v22, 10
1935; GFX10_1-NEXT: v_readlane_b32 s40, v22, 9
1936; GFX10_1-NEXT: v_readlane_b32 s39, v22, 8
1937; GFX10_1-NEXT: v_readlane_b32 s38, v22, 7
1938; GFX10_1-NEXT: v_readlane_b32 s37, v22, 6
1939; GFX10_1-NEXT: v_readlane_b32 s36, v22, 5
1940; GFX10_1-NEXT: v_readlane_b32 s35, v22, 4
1941; GFX10_1-NEXT: v_readlane_b32 s34, v22, 3
1942; GFX10_1-NEXT: v_readlane_b32 s33, v22, 2
1943; GFX10_1-NEXT: v_readlane_b32 s31, v22, 1
1944; GFX10_1-NEXT: v_readlane_b32 s30, v22, 0
Matt Arsenaultef676642024-07-31 23:10:15 +04001945; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
1946; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001947; GFX10_1-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload
Matt Arsenaultef676642024-07-31 23:10:15 +04001948; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
1949; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
1950; GFX10_1-NEXT: s_waitcnt vmcnt(0)
1951; GFX10_1-NEXT: s_setpc_b64 s[30:31]
1952;
1953; GFX10_3-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
1954; GFX10_3: ; %bb.0:
1955; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1956; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
1957; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001958; GFX10_3-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill
Matt Arsenaultef676642024-07-31 23:10:15 +04001959; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001960; GFX10_3-NEXT: v_writelane_b32 v22, s30, 0
Matt Arsenaultef91cd32024-10-19 12:33:03 -07001961; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001962; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5
1963; GFX10_3-NEXT: v_writelane_b32 v22, s31, 1
Matt Arsenaultef91cd32024-10-19 12:33:03 -07001964; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
Matt Arsenaultef676642024-07-31 23:10:15 +04001965; GFX10_3-NEXT: ;;#ASMSTART
Matt Arsenaultef91cd32024-10-19 12:33:03 -07001966; GFX10_3-NEXT: ; use alloca0 v0
Matt Arsenaultef676642024-07-31 23:10:15 +04001967; GFX10_3-NEXT: ;;#ASMEND
Matt Arsenault1bf385f2024-11-08 19:01:59 -08001968; GFX10_3-NEXT: v_writelane_b32 v22, s33, 2
1969; GFX10_3-NEXT: v_writelane_b32 v22, s34, 3
1970; GFX10_3-NEXT: v_writelane_b32 v22, s35, 4
1971; GFX10_3-NEXT: v_writelane_b32 v22, s36, 5
1972; GFX10_3-NEXT: v_writelane_b32 v22, s37, 6
1973; GFX10_3-NEXT: v_writelane_b32 v22, s38, 7
1974; GFX10_3-NEXT: v_writelane_b32 v22, s39, 8
1975; GFX10_3-NEXT: v_writelane_b32 v22, s40, 9
1976; GFX10_3-NEXT: v_writelane_b32 v22, s41, 10
1977; GFX10_3-NEXT: v_writelane_b32 v22, s42, 11
1978; GFX10_3-NEXT: v_writelane_b32 v22, s43, 12
1979; GFX10_3-NEXT: v_writelane_b32 v22, s44, 13
1980; GFX10_3-NEXT: v_writelane_b32 v22, s45, 14
1981; GFX10_3-NEXT: v_writelane_b32 v22, s46, 15
1982; GFX10_3-NEXT: v_writelane_b32 v22, s47, 16
1983; GFX10_3-NEXT: v_writelane_b32 v22, s48, 17
1984; GFX10_3-NEXT: v_writelane_b32 v22, s49, 18
1985; GFX10_3-NEXT: v_writelane_b32 v22, s50, 19
1986; GFX10_3-NEXT: v_writelane_b32 v22, s51, 20
1987; GFX10_3-NEXT: v_writelane_b32 v22, s52, 21
1988; GFX10_3-NEXT: v_writelane_b32 v22, s53, 22
1989; GFX10_3-NEXT: v_writelane_b32 v22, s54, 23
1990; GFX10_3-NEXT: v_writelane_b32 v22, s55, 24
1991; GFX10_3-NEXT: v_writelane_b32 v22, s56, 25
1992; GFX10_3-NEXT: v_writelane_b32 v22, s57, 26
1993; GFX10_3-NEXT: v_writelane_b32 v22, s59, 27
1994; GFX10_3-NEXT: s_add_i32 s59, s4, 0x4240
1995; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
Matt Arsenaultef676642024-07-31 23:10:15 +04001996; GFX10_3-NEXT: ;;#ASMSTART
1997; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
1998; GFX10_3-NEXT: ;;#ASMEND
Matt Arsenaultef676642024-07-31 23:10:15 +04001999; GFX10_3-NEXT: ;;#ASMSTART
2000; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
2001; GFX10_3-NEXT: ;;#ASMEND
Matt Arsenault1bf385f2024-11-08 19:01:59 -08002002; GFX10_3-NEXT: v_readlane_b32 s59, v22, 27
2003; GFX10_3-NEXT: v_readlane_b32 s57, v22, 26
2004; GFX10_3-NEXT: v_readlane_b32 s56, v22, 25
2005; GFX10_3-NEXT: v_readlane_b32 s55, v22, 24
2006; GFX10_3-NEXT: v_readlane_b32 s54, v22, 23
2007; GFX10_3-NEXT: v_readlane_b32 s53, v22, 22
2008; GFX10_3-NEXT: v_readlane_b32 s52, v22, 21
2009; GFX10_3-NEXT: v_readlane_b32 s51, v22, 20
2010; GFX10_3-NEXT: v_readlane_b32 s50, v22, 19
2011; GFX10_3-NEXT: v_readlane_b32 s49, v22, 18
2012; GFX10_3-NEXT: v_readlane_b32 s48, v22, 17
2013; GFX10_3-NEXT: v_readlane_b32 s47, v22, 16
2014; GFX10_3-NEXT: v_readlane_b32 s46, v22, 15
2015; GFX10_3-NEXT: v_readlane_b32 s45, v22, 14
2016; GFX10_3-NEXT: v_readlane_b32 s44, v22, 13
2017; GFX10_3-NEXT: v_readlane_b32 s43, v22, 12
2018; GFX10_3-NEXT: v_readlane_b32 s42, v22, 11
2019; GFX10_3-NEXT: v_readlane_b32 s41, v22, 10
2020; GFX10_3-NEXT: v_readlane_b32 s40, v22, 9
2021; GFX10_3-NEXT: v_readlane_b32 s39, v22, 8
2022; GFX10_3-NEXT: v_readlane_b32 s38, v22, 7
2023; GFX10_3-NEXT: v_readlane_b32 s37, v22, 6
2024; GFX10_3-NEXT: v_readlane_b32 s36, v22, 5
2025; GFX10_3-NEXT: v_readlane_b32 s35, v22, 4
2026; GFX10_3-NEXT: v_readlane_b32 s34, v22, 3
2027; GFX10_3-NEXT: v_readlane_b32 s33, v22, 2
2028; GFX10_3-NEXT: v_readlane_b32 s31, v22, 1
2029; GFX10_3-NEXT: v_readlane_b32 s30, v22, 0
Matt Arsenaultef676642024-07-31 23:10:15 +04002030; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
2031; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800
Matt Arsenault1bf385f2024-11-08 19:01:59 -08002032; GFX10_3-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload
Matt Arsenaultef676642024-07-31 23:10:15 +04002033; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
2034; GFX10_3-NEXT: s_waitcnt vmcnt(0)
2035; GFX10_3-NEXT: s_setpc_b64 s[30:31]
2036;
2037; GFX11-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
2038; GFX11: ; %bb.0:
2039; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2040; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
2041; GFX11-NEXT: s_add_i32 s1, s32, 0x8040
Matt Arsenault1bf385f2024-11-08 19:01:59 -08002042; GFX11-NEXT: scratch_store_b32 off, v22, s1 ; 4-byte Folded Spill
Matt Arsenaultef676642024-07-31 23:10:15 +04002043; GFX11-NEXT: s_mov_b32 exec_lo, s0
Matt Arsenault1bf385f2024-11-08 19:01:59 -08002044; GFX11-NEXT: v_writelane_b32 v22, s30, 0
Matt Arsenaultef676642024-07-31 23:10:15 +04002045; GFX11-NEXT: s_add_i32 s0, s32, 64
Matt Arsenault1bf385f2024-11-08 19:01:59 -08002046; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2047; GFX11-NEXT: v_mov_b32_e32 v0, s0
Matt Arsenaultef676642024-07-31 23:10:15 +04002048; GFX11-NEXT: ;;#ASMSTART
Matt Arsenaultef91cd32024-10-19 12:33:03 -07002049; GFX11-NEXT: ; use alloca0 v0
Matt Arsenaultef676642024-07-31 23:10:15 +04002050; GFX11-NEXT: ;;#ASMEND
Matt Arsenault1bf385f2024-11-08 19:01:59 -08002051; GFX11-NEXT: v_writelane_b32 v22, s31, 1
2052; GFX11-NEXT: v_writelane_b32 v22, s33, 2
2053; GFX11-NEXT: v_writelane_b32 v22, s34, 3
2054; GFX11-NEXT: v_writelane_b32 v22, s35, 4
2055; GFX11-NEXT: v_writelane_b32 v22, s36, 5
2056; GFX11-NEXT: v_writelane_b32 v22, s37, 6
2057; GFX11-NEXT: v_writelane_b32 v22, s38, 7
2058; GFX11-NEXT: v_writelane_b32 v22, s39, 8
2059; GFX11-NEXT: v_writelane_b32 v22, s40, 9
2060; GFX11-NEXT: v_writelane_b32 v22, s41, 10
2061; GFX11-NEXT: v_writelane_b32 v22, s42, 11
2062; GFX11-NEXT: v_writelane_b32 v22, s43, 12
2063; GFX11-NEXT: v_writelane_b32 v22, s44, 13
2064; GFX11-NEXT: v_writelane_b32 v22, s45, 14
2065; GFX11-NEXT: v_writelane_b32 v22, s46, 15
2066; GFX11-NEXT: v_writelane_b32 v22, s47, 16
2067; GFX11-NEXT: v_writelane_b32 v22, s48, 17
2068; GFX11-NEXT: v_writelane_b32 v22, s49, 18
2069; GFX11-NEXT: v_writelane_b32 v22, s50, 19
2070; GFX11-NEXT: v_writelane_b32 v22, s51, 20
2071; GFX11-NEXT: v_writelane_b32 v22, s52, 21
2072; GFX11-NEXT: v_writelane_b32 v22, s53, 22
2073; GFX11-NEXT: v_writelane_b32 v22, s54, 23
2074; GFX11-NEXT: v_writelane_b32 v22, s55, 24
2075; GFX11-NEXT: v_writelane_b32 v22, s56, 25
2076; GFX11-NEXT: v_writelane_b32 v22, s57, 26
2077; GFX11-NEXT: v_writelane_b32 v22, s59, 27
2078; GFX11-NEXT: s_add_i32 s59, s32, 0x4240
2079; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
Matt Arsenaultef676642024-07-31 23:10:15 +04002080; GFX11-NEXT: ;;#ASMSTART
2081; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
2082; GFX11-NEXT: ;;#ASMEND
Matt Arsenaultef676642024-07-31 23:10:15 +04002083; GFX11-NEXT: ;;#ASMSTART
2084; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
2085; GFX11-NEXT: ;;#ASMEND
Matt Arsenault1bf385f2024-11-08 19:01:59 -08002086; GFX11-NEXT: v_readlane_b32 s59, v22, 27
2087; GFX11-NEXT: v_readlane_b32 s57, v22, 26
2088; GFX11-NEXT: v_readlane_b32 s56, v22, 25
2089; GFX11-NEXT: v_readlane_b32 s55, v22, 24
2090; GFX11-NEXT: v_readlane_b32 s54, v22, 23
2091; GFX11-NEXT: v_readlane_b32 s53, v22, 22
2092; GFX11-NEXT: v_readlane_b32 s52, v22, 21
2093; GFX11-NEXT: v_readlane_b32 s51, v22, 20
2094; GFX11-NEXT: v_readlane_b32 s50, v22, 19
2095; GFX11-NEXT: v_readlane_b32 s49, v22, 18
2096; GFX11-NEXT: v_readlane_b32 s48, v22, 17
2097; GFX11-NEXT: v_readlane_b32 s47, v22, 16
2098; GFX11-NEXT: v_readlane_b32 s46, v22, 15
2099; GFX11-NEXT: v_readlane_b32 s45, v22, 14
2100; GFX11-NEXT: v_readlane_b32 s44, v22, 13
2101; GFX11-NEXT: v_readlane_b32 s43, v22, 12
2102; GFX11-NEXT: v_readlane_b32 s42, v22, 11
2103; GFX11-NEXT: v_readlane_b32 s41, v22, 10
2104; GFX11-NEXT: v_readlane_b32 s40, v22, 9
2105; GFX11-NEXT: v_readlane_b32 s39, v22, 8
2106; GFX11-NEXT: v_readlane_b32 s38, v22, 7
2107; GFX11-NEXT: v_readlane_b32 s37, v22, 6
2108; GFX11-NEXT: v_readlane_b32 s36, v22, 5
2109; GFX11-NEXT: v_readlane_b32 s35, v22, 4
2110; GFX11-NEXT: v_readlane_b32 s34, v22, 3
2111; GFX11-NEXT: v_readlane_b32 s33, v22, 2
2112; GFX11-NEXT: v_readlane_b32 s31, v22, 1
2113; GFX11-NEXT: v_readlane_b32 s30, v22, 0
Matt Arsenaultef676642024-07-31 23:10:15 +04002114; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
2115; GFX11-NEXT: s_add_i32 s1, s32, 0x8040
Matt Arsenault1bf385f2024-11-08 19:01:59 -08002116; GFX11-NEXT: scratch_load_b32 v22, off, s1 ; 4-byte Folded Reload
Matt Arsenaultef676642024-07-31 23:10:15 +04002117; GFX11-NEXT: s_mov_b32 exec_lo, s0
2118; GFX11-NEXT: s_waitcnt vmcnt(0)
2119; GFX11-NEXT: s_setpc_b64 s[30:31]
2120;
2121; GFX12-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
2122; GFX12: ; %bb.0:
2123; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
2124; GFX12-NEXT: s_wait_expcnt 0x0
2125; GFX12-NEXT: s_wait_samplecnt 0x0
2126; GFX12-NEXT: s_wait_bvhcnt 0x0
2127; GFX12-NEXT: s_wait_kmcnt 0x0
2128; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
Matt Arsenault1bf385f2024-11-08 19:01:59 -08002129; GFX12-NEXT: scratch_store_b32 off, v22, s32 offset:32768 ; 4-byte Folded Spill
Carl Ritson86627142024-09-04 12:15:20 +09002130; GFX12-NEXT: s_wait_alu 0xfffe
Matt Arsenaultef676642024-07-31 23:10:15 +04002131; GFX12-NEXT: s_mov_b32 exec_lo, s0
Matt Arsenault1bf385f2024-11-08 19:01:59 -08002132; GFX12-NEXT: v_writelane_b32 v22, s30, 0
2133; GFX12-NEXT: v_mov_b32_e32 v0, s32
Matt Arsenaultef676642024-07-31 23:10:15 +04002134; GFX12-NEXT: ;;#ASMSTART
Matt Arsenaultef91cd32024-10-19 12:33:03 -07002135; GFX12-NEXT: ; use alloca0 v0
Matt Arsenaultef676642024-07-31 23:10:15 +04002136; GFX12-NEXT: ;;#ASMEND
Matt Arsenault1bf385f2024-11-08 19:01:59 -08002137; GFX12-NEXT: v_writelane_b32 v22, s31, 1
2138; GFX12-NEXT: v_writelane_b32 v22, s33, 2
2139; GFX12-NEXT: v_writelane_b32 v22, s34, 3
2140; GFX12-NEXT: v_writelane_b32 v22, s35, 4
2141; GFX12-NEXT: v_writelane_b32 v22, s36, 5
2142; GFX12-NEXT: v_writelane_b32 v22, s37, 6
2143; GFX12-NEXT: v_writelane_b32 v22, s38, 7
2144; GFX12-NEXT: v_writelane_b32 v22, s39, 8
2145; GFX12-NEXT: v_writelane_b32 v22, s40, 9
2146; GFX12-NEXT: v_writelane_b32 v22, s41, 10
2147; GFX12-NEXT: v_writelane_b32 v22, s42, 11
2148; GFX12-NEXT: v_writelane_b32 v22, s43, 12
2149; GFX12-NEXT: v_writelane_b32 v22, s44, 13
2150; GFX12-NEXT: v_writelane_b32 v22, s45, 14
2151; GFX12-NEXT: v_writelane_b32 v22, s46, 15
2152; GFX12-NEXT: v_writelane_b32 v22, s47, 16
2153; GFX12-NEXT: v_writelane_b32 v22, s48, 17
2154; GFX12-NEXT: v_writelane_b32 v22, s49, 18
2155; GFX12-NEXT: v_writelane_b32 v22, s50, 19
2156; GFX12-NEXT: v_writelane_b32 v22, s51, 20
2157; GFX12-NEXT: v_writelane_b32 v22, s52, 21
2158; GFX12-NEXT: v_writelane_b32 v22, s53, 22
2159; GFX12-NEXT: v_writelane_b32 v22, s54, 23
2160; GFX12-NEXT: v_writelane_b32 v22, s55, 24
2161; GFX12-NEXT: v_writelane_b32 v22, s56, 25
2162; GFX12-NEXT: v_writelane_b32 v22, s57, 26
2163; GFX12-NEXT: v_writelane_b32 v22, s59, 27
2164; GFX12-NEXT: s_add_co_i32 s59, s32, 0x4200
2165; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
Matt Arsenaultef676642024-07-31 23:10:15 +04002166; GFX12-NEXT: ;;#ASMSTART
2167; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
2168; GFX12-NEXT: ;;#ASMEND
Matt Arsenaultef676642024-07-31 23:10:15 +04002169; GFX12-NEXT: ;;#ASMSTART
2170; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
2171; GFX12-NEXT: ;;#ASMEND
Matt Arsenault1bf385f2024-11-08 19:01:59 -08002172; GFX12-NEXT: v_readlane_b32 s59, v22, 27
2173; GFX12-NEXT: v_readlane_b32 s57, v22, 26
2174; GFX12-NEXT: v_readlane_b32 s56, v22, 25
2175; GFX12-NEXT: v_readlane_b32 s55, v22, 24
2176; GFX12-NEXT: v_readlane_b32 s54, v22, 23
2177; GFX12-NEXT: v_readlane_b32 s53, v22, 22
2178; GFX12-NEXT: v_readlane_b32 s52, v22, 21
2179; GFX12-NEXT: v_readlane_b32 s51, v22, 20
2180; GFX12-NEXT: v_readlane_b32 s50, v22, 19
2181; GFX12-NEXT: v_readlane_b32 s49, v22, 18
2182; GFX12-NEXT: v_readlane_b32 s48, v22, 17
2183; GFX12-NEXT: v_readlane_b32 s47, v22, 16
2184; GFX12-NEXT: v_readlane_b32 s46, v22, 15
2185; GFX12-NEXT: v_readlane_b32 s45, v22, 14
2186; GFX12-NEXT: v_readlane_b32 s44, v22, 13
2187; GFX12-NEXT: v_readlane_b32 s43, v22, 12
2188; GFX12-NEXT: v_readlane_b32 s42, v22, 11
2189; GFX12-NEXT: v_readlane_b32 s41, v22, 10
2190; GFX12-NEXT: v_readlane_b32 s40, v22, 9
2191; GFX12-NEXT: v_readlane_b32 s39, v22, 8
2192; GFX12-NEXT: v_readlane_b32 s38, v22, 7
2193; GFX12-NEXT: v_readlane_b32 s37, v22, 6
2194; GFX12-NEXT: v_readlane_b32 s36, v22, 5
2195; GFX12-NEXT: v_readlane_b32 s35, v22, 4
2196; GFX12-NEXT: v_readlane_b32 s34, v22, 3
2197; GFX12-NEXT: v_readlane_b32 s33, v22, 2
2198; GFX12-NEXT: v_readlane_b32 s31, v22, 1
2199; GFX12-NEXT: v_readlane_b32 s30, v22, 0
Matt Arsenaultef676642024-07-31 23:10:15 +04002200; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
Matt Arsenault1bf385f2024-11-08 19:01:59 -08002201; GFX12-NEXT: scratch_load_b32 v22, off, s32 offset:32768 ; 4-byte Folded Reload
Carl Ritson86627142024-09-04 12:15:20 +09002202; GFX12-NEXT: s_wait_alu 0xfffe
Matt Arsenaultef676642024-07-31 23:10:15 +04002203; GFX12-NEXT: s_mov_b32 exec_lo, s0
2204; GFX12-NEXT: s_wait_loadcnt 0x0
Matt Arsenaultef676642024-07-31 23:10:15 +04002205; GFX12-NEXT: s_setpc_b64 s[30:31]
2206 %alloca0 = alloca [4096 x i32], align 64, addrspace(5)
2207 %alloca1 = alloca [4096 x i32], align 4, addrspace(5)
2208 call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
2209
2210 ; Force no SGPRs to be available for the carry-out of the vector add.
2211 %asm = call %asm.output3 asm sideeffect
2212 "; def $0, $1, $2, $3, $4, $5, $6, $7",
2213 "={s[0:15]},={s[16:31]},={s[32:47]},={s[48:55]},={s[56:57]},={v[0:15]},={v[16:21]},={vcc}"()
2214
2215 %s0 = extractvalue %asm.output3 %asm, 0
2216 %s1 = extractvalue %asm.output3 %asm, 1
2217 %s2 = extractvalue %asm.output3 %asm, 2
2218 %s3 = extractvalue %asm.output3 %asm, 3
2219 %s4 = extractvalue %asm.output3 %asm, 4
2220
2221 %v0 = extractvalue %asm.output3 %asm, 5
2222 %v1 = extractvalue %asm.output3 %asm, 6
2223
2224 %vcc = extractvalue %asm.output3 %asm, 7
2225
2226 %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 128
2227
2228 ; scc is unavailable since it is live in
2229 call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9",
2230 "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{v[0:15]},{v[16:21]},{vcc},{s59},{scc}"(
2231 <16 x i32> %s0,
2232 <16 x i32> %s1,
2233 <16 x i32> %s2,
2234 <8 x i32> %s3,
2235 <2 x i32> %s4,
2236 <16 x i32> %v0,
2237 <6 x i32> %v1,
2238 i64 %vcc,
2239 ptr addrspace(5) %alloca1.offset,
2240 i32 0) ; use of scc
2241
2242 ret void
2243}
2244
2245; For gfx8/gfx9, this should enforce a budget of 24 VGPRs, and 60 SGPRs (4
2246; are reserved at the end for xnack + vcc).
2247attributes #0 = { nounwind alignstack=64 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" }
2248attributes #1 = { nounwind alignstack=16 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" }