| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn -mcpu=verde -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 < %s | FileCheck -check-prefixes=CHECK,GFX6 %s |
| ; RUN: llc -sgpr-regalloc=basic -vgpr-regalloc=basic -mtriple=amdgcn -mcpu=tonga -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 < %s | FileCheck --check-prefix=CHECK %s |
| ; RUN: llc -mtriple=amdgcn -mattr=-xnack,+enable-flat-scratch -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 < %s | FileCheck -check-prefixes=CHECK,GFX9-FLATSCR,FLATSCR %s |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=CHECK,GFX10-FLATSCR,FLATSCR %s |
| ; |
| ; There is something about Tonga that causes this test to spend a lot of time |
| ; in the default register allocator. |
| |
| |
| ; When the offset of VGPR spills into scratch space gets too large, an additional SGPR |
| ; is used to calculate the scratch load/store address. Make sure that this |
| ; mechanism works even when many spills happen. |
| |
| ; Just test that it compiles successfully. |
| define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { |
| ; GFX6-LABEL: test: |
| ; GFX6: ; %bb.0: ; %entry |
| ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GFX6-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 |
| ; GFX6-NEXT: v_mbcnt_hi_u32_b32_e32 v0, -1, v0 |
| ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 13, v0 |
| ; GFX6-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-NEXT: v_mov_b32_e32 v6, 0 |
| ; GFX6-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-NEXT: v_add_i32_e32 v7, vcc, s2, v5 |
| ; GFX6-NEXT: v_mov_b32_e32 v0, s3 |
| ; GFX6-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GFX6-NEXT: v_addc_u32_e32 v8, vcc, 0, v0, vcc |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 |
| ; GFX6-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 |
| ; GFX6-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 |
| ; GFX6-NEXT: s_mov_b32 s42, -1 |
| ; GFX6-NEXT: s_mov_b32 s43, 0xe8f000 |
| ; GFX6-NEXT: s_add_u32 s40, s40, s11 |
| ; GFX6-NEXT: s_addc_u32 s41, s41, 0 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x3fd00 |
| ; GFX6-NEXT: s_mov_b64 s[8:9], 0x100 |
| ; GFX6-NEXT: s_mov_b64 s[10:11], s[6:7] |
| ; GFX6-NEXT: s_mov_b64 s[12:13], 0x180 |
| ; GFX6-NEXT: s_mov_b64 s[14:15], s[6:7] |
| ; GFX6-NEXT: s_mov_b64 s[16:17], 0x200 |
| ; GFX6-NEXT: s_mov_b64 s[18:19], s[6:7] |
| ; GFX6-NEXT: s_mov_b64 s[20:21], 0x280 |
| ; GFX6-NEXT: s_mov_b64 s[22:23], s[6:7] |
| ; GFX6-NEXT: s_mov_b64 s[24:25], 0x300 |
| ; GFX6-NEXT: s_mov_b64 s[26:27], s[6:7] |
| ; GFX6-NEXT: s_mov_b64 s[28:29], 0x380 |
| ; GFX6-NEXT: s_mov_b64 s[30:31], s[6:7] |
| ; GFX6-NEXT: s_mov_b64 s[36:37], 0x400 |
| ; GFX6-NEXT: s_mov_b64 s[38:39], s[6:7] |
| ; GFX6-NEXT: s_mov_b32 s33, 0x4f900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:16 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:16 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:20 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:24 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:28 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:32 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:32 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:36 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:40 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:44 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:48 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:48 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:52 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:56 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:60 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:64 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:64 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:68 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:72 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:76 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:80 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:80 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:84 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:88 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:92 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:96 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:96 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:100 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:104 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:108 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:112 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:112 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:116 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:120 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:124 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:128 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:128 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:132 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:136 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:140 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:144 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:144 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:148 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:152 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:156 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:160 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:160 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:164 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:168 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:172 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:176 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:176 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:180 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:184 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:188 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:192 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:192 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:196 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:200 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:204 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:208 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:208 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:212 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:216 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:220 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:224 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:224 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:228 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:232 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:236 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:240 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:240 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:244 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:248 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:252 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:256 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:256 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:260 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:264 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:268 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:272 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:272 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:276 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:280 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:284 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:288 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:288 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:292 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:296 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:300 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:304 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:304 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:308 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:312 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:316 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:320 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:320 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:324 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:328 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:332 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:336 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:336 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:340 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:344 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:348 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:352 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:352 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:356 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:360 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:364 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:368 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:368 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:372 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:376 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:380 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:384 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:384 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:388 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:392 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:396 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:400 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:400 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:404 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:408 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:412 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:416 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:416 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:420 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:424 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:428 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:432 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:432 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:436 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:440 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:444 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:448 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:448 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:452 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:456 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:460 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:464 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:464 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:468 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:472 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:476 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:480 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:480 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:484 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:488 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:492 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:496 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:496 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:500 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:504 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:508 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:512 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:512 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:516 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:520 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:524 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:528 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:528 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:532 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:536 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:540 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:544 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:544 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:548 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:552 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:556 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:560 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:560 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:564 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:568 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:572 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:576 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:576 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:580 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:584 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:588 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:592 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:592 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:596 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:600 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:604 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:608 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:608 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:612 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:616 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:620 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:624 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:624 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:628 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:632 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:636 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:640 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:640 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:644 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:648 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:652 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:656 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:656 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:660 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:664 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:668 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:672 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:672 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:676 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:680 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:684 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:688 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:688 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:692 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:696 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:700 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:704 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:704 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:708 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:712 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:716 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:720 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:720 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:724 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:728 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:732 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:736 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:736 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:740 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:744 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:748 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:752 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:752 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:756 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:760 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:764 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:768 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:768 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:772 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:776 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:780 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:784 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:784 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:788 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:792 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:796 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:800 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:800 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:804 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:808 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:812 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:816 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:816 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:820 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:824 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:828 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:832 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:832 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:836 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:840 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:844 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:848 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:848 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:852 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:856 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:860 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:864 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:864 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:868 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:872 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:876 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:880 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:880 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:884 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:888 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:892 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:896 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:896 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:900 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:904 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:908 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:912 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:912 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:916 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:920 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:924 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:928 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:928 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:932 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:936 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:940 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:944 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:944 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:948 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:952 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:956 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:960 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:960 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:964 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:968 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:972 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:976 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:976 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:980 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:984 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:988 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:992 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:992 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:996 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1000 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1004 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1008 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1008 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1012 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1016 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1020 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1024 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1024 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1028 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1032 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1036 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1040 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1040 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1044 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1048 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1052 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1056 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1056 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1060 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1064 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1068 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1072 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1072 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1076 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1080 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1084 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1088 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1088 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1092 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1096 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1100 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1104 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1104 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1108 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1112 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1116 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1120 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1120 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1124 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1128 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1132 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1136 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1136 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1140 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1144 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1148 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1152 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1152 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1156 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1160 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1164 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1168 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1168 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1172 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1176 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1180 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1184 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1184 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1188 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1192 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1196 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1200 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1200 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1204 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1208 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1212 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1216 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1216 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1220 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1224 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1228 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1232 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1232 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1236 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1240 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1244 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1248 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1248 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1252 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1256 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1260 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1264 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1264 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1268 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1272 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1276 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1280 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1280 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1284 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1288 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1292 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1296 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1296 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1300 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1304 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1308 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1312 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1312 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1316 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1320 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1324 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1328 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1328 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1332 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1336 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1340 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1344 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1344 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1348 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1352 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1356 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1360 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1360 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1364 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1368 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1372 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1376 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1376 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1380 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1384 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1388 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1392 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1392 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1396 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1400 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1404 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1408 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1408 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1412 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1416 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1420 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1424 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1424 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1428 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1432 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1436 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1440 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1440 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1444 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1448 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1452 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1456 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1456 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1460 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1464 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1468 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1472 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1472 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1476 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1480 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1484 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1488 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1488 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1492 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1496 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1500 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1504 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1504 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1508 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1512 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1516 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1520 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1520 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1524 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1528 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1532 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1536 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1536 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1540 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1544 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1548 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1552 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1552 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1556 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1560 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1564 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1568 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1568 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1572 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1576 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1580 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1584 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1584 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1588 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1592 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1596 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1600 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1600 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1604 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1608 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1612 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1616 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1616 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1620 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1624 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1628 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1632 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1632 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1636 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1640 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1644 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1648 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1648 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1652 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1656 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1660 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1664 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1664 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1668 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1672 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1676 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1680 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1680 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1684 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1688 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1692 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1696 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1696 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1700 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1704 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1708 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1712 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1712 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1716 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1720 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1724 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1728 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1728 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1732 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1736 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1740 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1744 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1744 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1748 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1752 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1756 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1760 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1760 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1764 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1768 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1772 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1776 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1776 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1780 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1784 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1788 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1792 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1792 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1796 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1800 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1804 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1808 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1808 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1812 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1816 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1820 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1824 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1824 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1828 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1832 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1836 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1840 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1840 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1844 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1848 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1852 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1856 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1856 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1860 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1864 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1868 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1872 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1872 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1876 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1880 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1884 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1888 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1888 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1892 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1896 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1900 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1904 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1904 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1908 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1912 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1916 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1920 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1920 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1924 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1928 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1932 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1936 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1936 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1940 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1944 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1948 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1952 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1952 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1956 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1960 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1964 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1968 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1968 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1972 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1976 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1980 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1984 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1984 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1988 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1992 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1996 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2000 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2000 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2004 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2008 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2012 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2016 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2016 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2020 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2024 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2028 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2032 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2032 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2036 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2040 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2044 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2048 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2048 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2052 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2056 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2060 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2064 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2064 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2068 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2072 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2076 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2080 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2080 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2084 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2088 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2092 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2096 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2096 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2100 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2104 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2108 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2112 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2112 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2116 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2120 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2124 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2128 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2128 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2132 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2136 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2140 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2144 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2144 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2148 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2152 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2156 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2160 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2160 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2164 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2168 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2172 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2176 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2176 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2180 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2184 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2188 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2192 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2192 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2196 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2200 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2204 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2208 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2208 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2212 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2216 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2220 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2224 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2224 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2228 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2232 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2236 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2240 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2240 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2244 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2248 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2252 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2256 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2256 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2260 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2264 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2268 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2272 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2272 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2276 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2280 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2284 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2288 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2288 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2292 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2296 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2300 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2304 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2304 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2308 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2312 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2316 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2320 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2320 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2324 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2328 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2332 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2336 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2336 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2340 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2344 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2348 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2352 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2352 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2356 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2360 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2364 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2368 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2368 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2372 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2376 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2380 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2384 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2384 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2388 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2392 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2396 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2400 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2400 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2404 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2408 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2412 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2416 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2416 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2420 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2424 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2428 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2432 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2432 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2436 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2440 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2444 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2448 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2448 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2452 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2456 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2460 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2464 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2464 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2468 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2472 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2476 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2480 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2480 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2484 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2488 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2492 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2496 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2496 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2500 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2504 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2508 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2512 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2512 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2516 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2520 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2524 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2528 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2528 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2532 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2536 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2540 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2544 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2544 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2548 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2552 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2556 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2560 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2560 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2564 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2568 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2572 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2576 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2576 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2580 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2584 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2588 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2592 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2592 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2596 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2600 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2604 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2608 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2608 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2612 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2616 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2620 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2624 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2624 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2628 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2632 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2636 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2640 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2640 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2644 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2648 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2652 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2656 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2656 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2660 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2664 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2668 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2672 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2672 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2676 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2680 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2684 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2688 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2688 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2692 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2696 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2700 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2704 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2704 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2708 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2712 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2716 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2720 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2720 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2724 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2728 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2732 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2736 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2736 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2740 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2744 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2748 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2752 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2752 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2756 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2760 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2764 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2768 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2768 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2772 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2776 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2780 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2784 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2784 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2788 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2792 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2796 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2800 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2800 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2804 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2808 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2812 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2816 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2816 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2820 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2824 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2828 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2832 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2832 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2836 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2840 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2844 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2848 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2848 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2852 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2856 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2860 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2864 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2864 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2868 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2872 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2876 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2880 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2880 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2884 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2888 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2892 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2896 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2896 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2900 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2904 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2908 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2912 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2912 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2916 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2920 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2924 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2928 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2928 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2932 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2936 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2940 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2944 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2944 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2948 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2952 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2956 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2960 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2960 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2964 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2968 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2972 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2976 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2976 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2980 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2984 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2988 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2992 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2992 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2996 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3000 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3004 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3008 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3008 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3012 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3016 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3020 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3024 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3024 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3028 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3032 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3036 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3040 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3040 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3044 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3048 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3052 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3056 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3056 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3060 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3064 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3068 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3072 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3072 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3076 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3080 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3084 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3088 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3088 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3092 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3096 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3100 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3104 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3104 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3108 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3112 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3116 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3120 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3120 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3124 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3128 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3132 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3136 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3136 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3140 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3144 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3148 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3152 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3152 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3156 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3160 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3164 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3168 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3168 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3172 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3176 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3180 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3184 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3184 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3188 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3192 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3196 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3200 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3200 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3204 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3208 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3212 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3216 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3216 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3220 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3224 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3228 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3232 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3232 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3236 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3240 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3244 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3248 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3248 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3252 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3256 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3260 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3264 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3264 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3268 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3272 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3276 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3280 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3280 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3284 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3288 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3292 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3296 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3296 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3300 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3304 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3308 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3312 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3312 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3316 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3320 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3324 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3328 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3328 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3332 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3336 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3340 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3344 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3344 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3348 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3352 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3356 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3360 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3360 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3364 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3368 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3372 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3376 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3376 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3380 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3384 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3388 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3392 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3392 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3396 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3400 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3404 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3408 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3408 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3412 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3416 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3420 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3424 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3424 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3428 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3432 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3436 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3440 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3440 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3444 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3448 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3452 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3456 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3456 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3460 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3464 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3468 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3472 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3472 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3476 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3480 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3484 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3488 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3488 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3492 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3496 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3500 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3504 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3504 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3508 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3512 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3516 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3520 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3520 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3524 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3528 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3532 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3536 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3536 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3540 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3544 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3548 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3552 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3552 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3556 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3560 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3564 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3568 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3568 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3572 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3576 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3580 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3584 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3584 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3588 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3592 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3596 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3600 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3600 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3604 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3608 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3612 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3616 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3616 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3620 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3624 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3628 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3632 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3632 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3636 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3640 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3644 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3648 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3648 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3652 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3656 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3660 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3664 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3664 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3668 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3672 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3676 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3680 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3680 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3684 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3688 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3692 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3696 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3696 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3700 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3704 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3708 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3712 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3712 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3716 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3720 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3724 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3728 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3728 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3732 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3736 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3740 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3744 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3744 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3748 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3752 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3756 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3760 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3760 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3764 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3768 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3772 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3776 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3776 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3780 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3784 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3788 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3792 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3792 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3796 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3800 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3804 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3808 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3808 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3812 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3816 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3820 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3824 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3824 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3828 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3832 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3836 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3840 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3840 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3844 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3848 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3852 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3856 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3856 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3860 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3864 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3868 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3872 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3872 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3876 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3880 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3884 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3888 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3888 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3892 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3896 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3900 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3904 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3904 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3908 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3912 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3916 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3920 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3920 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3924 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3928 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3932 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3936 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3936 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3940 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3944 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3948 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3952 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3952 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3956 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3960 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3964 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3968 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3972 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3976 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3980 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3984 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3988 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3992 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3996 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:4000 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:4004 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:4008 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:4012 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:4016 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:4020 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:4024 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:4028 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:4032 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:4036 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:4040 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:4044 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:4048 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:4052 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:4056 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:4060 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:4064 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:4068 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:4072 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:4076 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:4080 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_mov_b64 s[4:5], 0x80 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x40100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x40500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x40900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x40d00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x41100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x41500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x41900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x41d00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x42100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x42500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x42900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x42d00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x43100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x43500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x43900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x43d00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x44100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x44500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x44900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x44d00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x45100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x45500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x45900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x45d00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x46100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x46500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x46900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x46d00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x47100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x47500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x47900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x47d00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x48100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x48500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x48900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x48d00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x49100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x49500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x49900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x49d00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4a100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4a500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4a900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4ad00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4b100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4b500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4b900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4bd00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4c100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4c500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4c900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4cd00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4d100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4d500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4d900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4dd00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4e100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4e500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4e900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4ed00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4f100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4f500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x4f900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4080 |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: v_add_i32_e32 v7, vcc, s0, v5 |
| ; GFX6-NEXT: v_mov_b32_e32 v4, s1 |
| ; GFX6-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s33, 0x4f500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s33, 0x4f100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s33, 0x4ed00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s33, 0x4e900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s33, 0x4e500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s33, 0x4e100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s33, 0x4dd00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s33, 0x4d900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s33, 0x4d500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s33, 0x4d100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s33, 0x4cd00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s33, 0x4c900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s33, 0x4c500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s33, 0x4c100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_mov_b32 s28, 0x4bd00 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s28, 0x4b900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s28, 0x4b500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s28, 0x4b100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s28, 0x4ad00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s28, 0x4a900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s28, 0x4a500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s28, 0x4a100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_mov_b32 s24, 0x49d00 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s24, 0x49900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s24, 0x49500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s24, 0x49100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s24, 0x48d00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s24, 0x48900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s24, 0x48500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s24, 0x48100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_mov_b32 s20, 0x47d00 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s20, 0x47900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s20, 0x47500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s20, 0x47100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s20, 0x46d00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s20, 0x46900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s20, 0x46500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s20, 0x46100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_mov_b32 s16, 0x45d00 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s16, 0x45900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s16, 0x45500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s16, 0x45100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s16, 0x44d00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s16, 0x44900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s16, 0x44500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s16, 0x44100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_mov_b32 s12, 0x43d00 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s12, 0x43900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s12, 0x43500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s12, 0x43100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s12, 0x42d00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s12, 0x42900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s12, 0x42500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s12, 0x42100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_mov_b32 s8, 0x41d00 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s8, 0x41900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s8, 0x41500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s8, 0x41100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s8, 0x40d00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s8, 0x40900 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s8, 0x40500 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s8, 0x40100 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_mov_b32 s4, 0x3fd00 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4080 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:4068 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:4072 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:4076 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:4080 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4064 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:4052 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:4056 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:4060 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:4064 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4048 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:4036 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:4040 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:4044 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:4048 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4032 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:4020 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:4024 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:4028 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:4032 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4016 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:4004 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:4008 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:4012 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:4016 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4000 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3988 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3992 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3996 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:4000 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3984 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3972 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3976 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3980 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3984 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3968 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3956 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3960 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3964 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3968 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3952 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3940 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3944 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3948 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3952 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3936 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3924 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3928 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3932 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3936 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3920 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3908 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3912 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3916 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3920 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3904 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3892 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3896 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3900 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3904 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3888 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3876 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3880 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3884 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3888 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3872 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3860 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3864 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3868 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3872 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3856 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3844 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3848 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3852 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3856 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3840 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3828 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3832 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3836 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3840 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3824 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3812 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3816 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3820 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3824 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3808 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3796 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3800 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3804 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3808 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3792 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3780 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3784 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3788 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3792 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3776 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3764 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3768 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3772 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3776 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3760 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3748 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3752 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3756 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3760 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3744 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3732 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3736 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3740 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3744 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3728 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3716 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3720 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3724 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3728 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3712 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3700 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3704 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3708 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3712 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3696 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3684 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3688 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3692 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3696 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3680 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3668 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3672 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3676 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3680 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3664 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3652 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3656 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3660 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3664 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3648 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3636 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3640 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3644 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3648 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3632 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3620 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3624 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3628 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3632 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3616 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3604 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3608 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3612 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3616 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3600 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3588 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3592 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3596 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3600 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3584 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3572 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3576 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3580 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3584 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3568 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3556 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3560 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3564 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3568 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3552 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3540 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3544 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3548 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3552 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3536 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3524 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3528 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3532 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3536 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3520 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3508 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3512 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3516 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3520 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3504 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3492 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3496 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3500 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3504 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3488 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3476 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3480 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3484 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3488 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3472 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3460 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3464 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3468 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3472 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3456 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3444 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3448 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3452 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3456 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3440 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3428 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3432 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3436 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3440 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3424 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3412 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3416 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3420 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3424 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3408 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3396 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3400 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3404 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3408 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3392 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3380 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3384 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3388 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3392 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3376 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3364 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3368 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3372 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3376 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3360 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3348 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3352 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3356 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3360 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3344 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3332 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3336 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3340 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3344 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3328 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3316 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3320 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3324 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3328 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3312 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3300 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3304 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3308 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3312 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3296 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3284 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3288 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3292 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3296 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3280 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3268 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3272 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3276 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3280 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3264 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3252 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3256 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3260 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3264 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3248 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3236 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3240 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3244 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3248 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3232 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3220 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3224 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3228 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3232 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3216 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3204 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3208 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3212 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3216 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3200 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3188 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3192 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3196 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3200 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3184 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3172 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3176 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3180 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3184 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3168 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3156 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3160 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3164 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3168 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3152 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3140 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3144 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3148 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3152 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3136 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3124 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3128 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3132 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3136 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3120 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3108 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3112 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3116 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3120 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3104 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3092 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3096 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3100 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3104 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3088 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3076 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3080 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3084 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3088 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3072 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3060 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3064 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3068 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3072 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3056 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3044 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3048 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3052 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3056 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3040 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3028 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3032 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3036 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3040 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3024 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3012 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3016 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3020 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3024 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3008 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2996 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3000 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3004 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3008 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2992 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2980 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2984 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2988 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2992 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2976 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2964 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2968 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2972 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2976 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2960 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2948 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2952 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2956 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2960 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2944 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2932 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2936 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2940 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2944 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2928 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2916 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2920 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2924 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2928 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2912 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2900 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2904 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2908 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2912 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2896 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2884 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2888 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2892 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2896 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2880 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2868 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2872 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2876 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2880 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2864 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2852 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2856 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2860 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2864 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2848 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2836 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2840 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2844 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2848 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2832 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2820 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2824 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2828 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2832 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2816 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2804 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2808 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2812 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2816 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2800 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2788 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2792 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2796 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2800 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2784 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2772 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2776 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2780 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2784 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2768 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2756 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2760 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2764 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2768 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2752 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2740 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2744 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2748 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2752 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2736 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2724 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2728 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2732 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2736 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2720 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2708 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2712 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2716 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2720 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2704 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2692 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2696 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2700 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2704 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2688 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2676 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2680 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2684 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2688 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2672 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2660 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2664 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2668 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2672 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2656 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2644 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2648 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2652 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2656 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2640 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2628 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2632 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2636 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2640 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2624 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2612 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2616 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2620 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2624 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2608 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2596 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2600 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2604 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2608 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2592 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2580 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2584 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2588 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2592 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2576 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2564 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2568 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2572 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2576 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2560 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2548 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2552 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2556 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2560 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2544 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2532 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2536 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2540 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2544 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2528 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2516 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2520 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2524 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2528 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2512 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2500 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2504 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2508 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2512 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2496 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2484 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2488 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2492 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2496 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2480 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2468 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2472 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2476 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2480 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2464 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2452 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2456 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2460 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2464 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2448 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2436 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2440 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2444 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2448 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2432 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2420 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2424 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2428 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2432 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2416 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2404 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2408 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2412 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2416 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2400 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2388 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2392 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2396 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2400 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2384 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2372 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2376 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2380 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2384 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2368 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2356 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2360 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2364 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2368 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2352 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2340 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2344 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2348 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2352 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2336 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2324 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2328 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2332 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2336 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2320 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2308 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2312 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2316 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2320 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2304 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2292 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2296 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2300 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2304 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2288 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2276 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2280 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2284 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2288 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2272 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2260 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2264 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2268 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2272 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2256 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2244 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2248 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2252 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2256 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2240 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2228 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2232 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2236 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2240 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2224 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2212 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2216 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2220 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2224 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2208 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2196 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2200 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2204 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2208 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2192 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2180 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2184 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2188 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2192 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2176 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2164 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2168 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2172 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2176 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2160 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2148 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2152 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2156 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2160 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2144 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2132 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2136 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2140 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2144 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2128 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2116 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2120 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2124 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2128 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2112 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2100 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2104 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2108 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2112 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2096 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2084 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2088 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2092 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2096 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2080 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2068 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2072 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2076 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2080 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2064 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2052 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2056 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2060 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2064 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2048 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2036 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2040 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2044 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2048 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2032 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2020 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2024 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2028 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2032 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2016 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2004 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2008 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2012 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2016 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2000 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1988 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1992 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1996 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2000 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1984 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1972 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1976 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1980 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1984 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1968 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1956 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1960 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1964 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1968 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1952 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1940 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1944 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1948 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1952 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1936 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1924 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1928 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1932 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1936 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1920 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1908 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1912 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1916 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1920 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1904 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1892 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1896 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1900 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1904 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1888 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1876 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1880 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1884 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1888 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1872 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1860 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1864 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1868 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1872 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1856 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1844 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1848 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1852 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1856 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1840 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1828 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1832 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1836 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1840 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1824 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1812 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1816 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1820 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1824 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1808 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1796 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1800 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1804 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1808 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1792 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1780 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1784 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1788 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1792 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1776 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1764 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1768 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1772 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1776 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1760 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1748 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1752 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1756 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1760 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1744 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1732 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1736 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1740 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1744 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1728 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1716 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1720 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1724 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1728 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1712 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1700 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1704 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1708 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1712 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1696 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1684 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1688 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1692 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1696 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1680 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1668 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1672 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1676 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1680 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1664 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1652 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1656 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1660 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1664 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1648 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1636 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1640 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1644 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1648 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1632 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1620 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1624 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1628 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1632 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1616 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1604 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1608 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1612 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1616 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1600 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1588 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1592 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1596 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1600 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1584 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1572 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1576 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1580 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1584 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1568 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1556 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1560 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1564 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1568 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1552 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1540 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1544 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1548 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1552 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1536 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1524 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1528 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1532 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1536 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1520 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1508 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1512 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1516 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1520 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1504 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1492 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1496 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1500 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1504 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1488 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1476 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1480 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1484 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1488 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1472 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1460 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1464 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1468 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1472 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1456 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1444 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1448 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1452 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1456 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1440 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1428 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1432 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1436 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1440 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1424 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1412 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1416 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1420 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1424 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1408 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1396 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1400 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1404 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1408 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1392 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1380 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1384 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1388 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1392 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1376 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1364 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1368 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1372 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1376 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1360 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1348 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1352 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1356 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1360 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1344 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1332 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1336 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1340 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1344 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1328 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1316 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1320 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1324 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1328 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1312 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1300 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1304 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1308 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1312 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1296 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1284 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1288 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1292 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1296 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1280 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1268 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1272 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1276 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1280 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1264 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1252 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1256 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1260 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1264 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1248 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1236 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1240 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1244 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1248 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1232 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1220 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1224 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1228 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1232 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1216 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1204 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1208 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1212 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1216 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1200 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1188 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1192 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1196 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1200 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1184 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1172 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1176 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1180 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1184 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1168 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1156 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1160 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1164 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1168 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1152 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1140 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1144 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1148 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1152 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1136 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1124 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1128 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1132 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1136 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1120 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1108 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1112 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1116 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1120 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1104 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1092 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1096 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1100 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1104 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1088 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1076 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1080 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1084 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1088 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1072 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1060 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1064 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1068 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1072 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1056 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1044 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1048 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1052 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1056 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1040 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1028 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1032 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1036 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1040 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1024 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1012 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1016 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1020 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1024 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1008 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:996 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1000 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1004 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1008 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:992 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:980 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:984 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:988 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:992 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:976 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:964 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:968 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:972 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:976 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:960 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:948 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:952 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:956 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:960 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:944 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:932 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:936 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:940 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:944 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:928 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:916 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:920 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:924 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:928 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:912 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:900 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:904 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:908 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:912 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:896 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:884 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:888 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:892 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:896 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:880 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:868 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:872 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:876 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:880 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:864 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:852 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:856 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:860 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:864 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:848 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:836 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:840 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:844 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:848 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:832 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:820 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:824 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:828 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:832 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:816 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:804 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:808 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:812 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:816 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:800 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:788 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:792 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:796 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:800 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:784 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:772 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:776 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:780 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:784 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:768 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:756 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:760 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:764 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:768 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:752 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:740 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:744 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:748 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:752 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:736 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:724 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:728 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:732 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:736 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:720 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:708 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:712 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:716 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:720 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:704 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:692 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:696 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:700 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:704 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:688 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:676 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:680 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:684 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:688 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:672 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:660 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:664 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:668 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:672 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:656 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:644 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:648 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:652 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:656 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:640 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:628 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:632 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:636 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:640 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:624 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:612 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:616 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:620 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:624 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:608 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:596 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:600 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:604 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:608 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:592 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:580 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:584 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:588 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:592 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:576 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:564 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:568 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:572 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:576 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:560 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:548 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:552 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:556 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:560 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:544 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:532 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:536 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:540 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:544 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:528 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:516 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:520 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:524 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:528 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:512 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:500 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:504 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:508 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:512 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:496 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:484 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:488 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:492 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:496 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:480 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:468 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:472 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:476 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:480 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:464 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:452 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:456 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:460 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:464 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:448 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:436 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:440 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:444 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:448 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:432 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:420 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:424 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:428 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:432 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:416 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:404 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:408 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:412 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:416 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:400 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:388 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:392 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:396 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:400 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:384 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:372 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:376 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:380 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:384 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:368 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:356 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:360 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:364 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:368 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:352 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:340 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:344 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:348 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:352 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:336 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:324 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:328 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:332 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:336 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:320 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:308 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:312 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:316 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:320 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:304 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:292 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:296 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:300 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:304 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:288 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:276 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:280 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:284 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:288 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:272 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:260 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:264 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:268 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:272 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:256 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:244 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:248 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:252 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:256 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:240 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:228 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:232 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:236 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:240 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:224 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:212 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:216 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:220 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:224 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:208 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:196 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:200 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:204 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:208 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:192 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:180 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:184 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:188 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:192 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:176 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:164 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:168 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:172 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:176 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:160 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:148 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:152 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:156 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:160 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:144 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:132 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:136 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:140 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:144 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:128 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:116 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:120 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:124 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:128 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:112 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:100 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:104 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:108 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:112 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:96 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:84 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:88 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:92 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:96 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:80 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:68 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:72 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:76 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:80 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:64 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:52 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:56 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:60 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:64 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:48 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:36 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:40 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:44 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:48 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:32 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:20 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:24 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:28 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:32 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:16 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:16 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 |
| ; GFX6-NEXT: s_endpgm |
| ; |
| ; GFX9-FLATSCR-LABEL: test: |
| ; GFX9-FLATSCR: ; %bb.0: ; %entry |
| ; GFX9-FLATSCR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; GFX9-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 |
| ; GFX9-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v0, -1, v0 |
| ; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v5, 13, v0 |
| ; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v2, vcc, s2, v5 |
| ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s3 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v0, vcc |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x80, v2 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 |
| ; GFX9-FLATSCR-NEXT: s_mov_b32 s4, 4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: s_mov_b32 s4, 20 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: s_mov_b32 s4, 36 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: s_mov_b32 s4, 52 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x44 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x54 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x64 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x100, v2 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x74 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x84 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x94 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x180, v2 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf4 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x104 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x114 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x124 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x134 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x144 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x154 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x164 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x200, v2 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x174 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x184 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x194 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x280, v2 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1f4 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x204 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x214 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x224 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x234 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x244 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x254 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x264 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x300, v2 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x274 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x284 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x294 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x380, v2 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2f4 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x304 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x314 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x324 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x334 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x344 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x354 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x364 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x400, v2 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x374 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x384 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x394 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x404 |
| ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, s1 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:16 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x414 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:32 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x424 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:48 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x434 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:64 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x444 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:80 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x454 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:96 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x464 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:112 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x474 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:128 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x484 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:144 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x494 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:160 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:176 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:192 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:208 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:224 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:240 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:256 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x504 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:272 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x514 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:288 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x524 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:304 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x534 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:320 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x544 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:336 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x554 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:352 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x564 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:368 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x574 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:384 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x584 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:400 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x594 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:416 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:432 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:448 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:464 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:480 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:496 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:512 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x604 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:528 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x614 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:544 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x624 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:560 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x634 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:576 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x644 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:592 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x654 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:608 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x664 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:624 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x674 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:640 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x684 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:656 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x694 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:672 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:688 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:704 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:720 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:736 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:752 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:768 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x704 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:784 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x714 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:800 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x724 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:816 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x734 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:832 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x744 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:848 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x754 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:864 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x764 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:880 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x774 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:896 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x784 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:912 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x794 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:928 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:944 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:960 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:976 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:992 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1008 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1024 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x804 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1040 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x814 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1056 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x824 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1072 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x834 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1088 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x844 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1104 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x854 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1120 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x864 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1136 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x874 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1152 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x884 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1168 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x894 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1184 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1200 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1216 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1232 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1248 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1264 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1280 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x904 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1296 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x914 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1312 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x924 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1328 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x934 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1344 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x944 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1360 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x954 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1376 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x964 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1392 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x974 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1408 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x984 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1424 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x994 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1440 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1456 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1472 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1488 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1504 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1520 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1536 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa04 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1552 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa14 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1568 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa24 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1584 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa34 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1600 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa44 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1616 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa54 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1632 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa64 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1648 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa74 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1664 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa84 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1680 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa94 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1696 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xaa4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1712 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xab4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1728 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xac4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1744 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xad4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1760 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xae4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1776 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xaf4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1792 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb04 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1808 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb14 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1824 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb24 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1840 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb34 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1856 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb44 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1872 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb54 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1888 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb64 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1904 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb74 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1920 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb84 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1936 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb94 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1952 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xba4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1968 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbb4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1984 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbc4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2000 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbd4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2016 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbe4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2032 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbf4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2048 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc04 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2064 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc14 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2080 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc24 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2096 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc34 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2112 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc44 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2128 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc54 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2144 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc64 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2160 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc74 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2176 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc84 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2192 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc94 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2208 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xca4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2224 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcb4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2240 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcc4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2256 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcd4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2272 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xce4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2288 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcf4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2304 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd04 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2320 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd14 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2336 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd24 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2352 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd34 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2368 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd44 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2384 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd54 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2400 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd64 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2416 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd74 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2432 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd84 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2448 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd94 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2464 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xda4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2480 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdb4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2496 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdc4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2512 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdd4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2528 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xde4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2544 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdf4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2560 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe04 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2576 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe14 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2592 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe24 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2608 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe34 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2624 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe44 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2640 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe54 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2656 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe64 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2672 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe74 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2688 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe84 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2704 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe94 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2720 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xea4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2736 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xeb4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2752 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xec4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2768 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xed4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2784 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xee4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2800 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xef4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2816 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf04 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2832 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf14 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2848 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf24 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2864 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf34 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2880 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf44 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2896 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf54 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2912 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf64 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2928 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf74 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2944 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf84 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2960 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf94 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2976 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfa4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2992 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfb4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3008 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfc4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3024 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfd4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3040 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfe4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3056 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xff4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3072 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1004 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3088 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1014 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3104 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1024 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3120 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1034 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3136 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1044 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3152 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1054 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3168 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1064 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3184 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1074 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3200 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1084 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3216 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1094 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3232 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3248 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3264 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3280 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3296 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3312 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3328 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1104 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3344 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1114 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3360 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1124 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3376 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1134 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3392 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1144 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3408 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1154 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3424 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1164 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3440 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1174 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3456 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1184 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3472 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1194 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3488 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3504 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3520 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3536 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3552 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3568 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3584 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1204 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3600 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1214 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3616 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1224 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3632 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1234 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3648 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1244 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3664 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1254 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3680 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1264 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3696 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1274 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3712 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1284 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3728 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1294 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3744 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3760 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3776 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3792 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3808 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3824 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3840 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1304 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3856 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1314 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3872 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1324 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3888 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1334 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3904 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1344 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3920 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1354 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3936 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1364 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3952 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1374 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3968 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1384 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3984 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1394 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4000 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4016 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4032 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4048 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4064 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4080 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x13e4 |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v4, vcc, s0, v5 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4080 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x13d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4064 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x13c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4048 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x13b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4032 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x13a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4016 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1394 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4000 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1384 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3984 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1374 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3968 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1364 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3952 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1354 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3936 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1344 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3920 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1334 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3904 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1324 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3888 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1314 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3872 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1304 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3856 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x12f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3840 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x12e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3824 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x12d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3808 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x12c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3792 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x12b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3776 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x12a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3760 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1294 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3744 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1284 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3728 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1274 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3712 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1264 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3696 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1254 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3680 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1244 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3664 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1234 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3648 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1224 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3632 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1214 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3616 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1204 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3600 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x11f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3584 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x11e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3568 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x11d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3552 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x11c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3536 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x11b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3520 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x11a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3504 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1194 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3488 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1184 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3472 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1174 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3456 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1164 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3440 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1154 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3424 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1144 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3408 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1134 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3392 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1124 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3376 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1114 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3360 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1104 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3344 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x10f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3328 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x10e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3312 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x10d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3296 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x10c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3280 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x10b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3264 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x10a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3248 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1094 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3232 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1084 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3216 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1074 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3200 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1064 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3184 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1054 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3168 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1044 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3152 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1034 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3136 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1024 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3120 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1014 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3104 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1004 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3088 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xff4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3072 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xfe4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3056 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xfd4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3040 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xfc4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3024 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xfb4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3008 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xfa4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2992 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf94 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2976 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf84 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2960 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf74 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2944 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf64 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2928 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf54 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2912 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf44 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2896 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf34 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2880 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf24 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2864 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf14 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2848 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf04 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2832 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xef4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2816 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xee4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2800 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xed4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2784 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xec4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2768 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xeb4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2752 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xea4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2736 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe94 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2720 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe84 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2704 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe74 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2688 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe64 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2672 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe54 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2656 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe44 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2640 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe34 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2624 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe24 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2608 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe14 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2592 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe04 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2576 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xdf4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2560 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xde4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2544 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xdd4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2528 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xdc4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2512 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xdb4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2496 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xda4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2480 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd94 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2464 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd84 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2448 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd74 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2432 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd64 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2416 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd54 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2400 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd44 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2384 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd34 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2368 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd24 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2352 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd14 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2336 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd04 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2320 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xcf4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2304 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xce4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2288 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xcd4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2272 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xcc4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2256 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xcb4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2240 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xca4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2224 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc94 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2208 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc84 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2192 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc74 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2176 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc64 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2160 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc54 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2144 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc44 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2128 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc34 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2112 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc24 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2096 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc14 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2080 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc04 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2064 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xbf4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2048 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xbe4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2032 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xbd4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2016 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xbc4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2000 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xbb4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1984 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xba4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1968 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb94 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1952 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb84 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1936 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb74 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1920 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb64 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1904 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb54 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1888 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb44 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1872 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb34 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1856 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb24 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1840 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb14 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1824 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb04 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1808 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xaf4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1792 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xae4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1776 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xad4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1760 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xac4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1744 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xab4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1728 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xaa4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1712 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa94 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1696 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa84 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1680 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa74 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1664 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa64 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1648 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa54 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1632 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa44 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1616 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa34 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1600 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa24 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1584 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa14 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1568 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa04 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1552 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x9f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1536 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x9e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1520 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x9d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1504 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x9c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1488 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x9b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1472 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x9a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1456 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x994 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1440 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x984 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1424 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x974 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1408 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x964 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1392 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x954 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1376 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x944 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1360 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x934 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1344 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x924 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1328 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x914 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1312 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x904 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1296 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x8f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1280 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x8e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1264 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x8d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1248 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x8c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1232 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x8b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1216 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x8a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1200 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x894 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1184 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x884 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1168 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x874 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1152 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x864 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1136 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x854 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1120 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x844 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1104 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x834 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1088 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x824 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1072 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x814 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1056 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x804 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1040 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x7f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1024 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x7e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1008 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x7d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:992 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x7c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:976 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x7b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:960 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x7a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:944 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x794 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:928 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x784 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:912 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x774 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:896 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x764 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:880 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x754 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:864 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x744 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:848 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x734 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:832 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x724 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:816 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x714 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:800 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x704 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:784 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x6f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:768 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x6e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:752 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x6d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:736 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x6c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:720 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x6b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:704 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x6a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:688 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x694 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:672 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x684 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:656 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x674 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:640 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x664 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:624 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x654 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:608 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x644 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:592 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x634 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:576 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x624 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:560 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x614 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:544 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x604 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:528 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x5f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:512 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x5e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:496 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x5d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:480 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x5c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:464 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x5b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:448 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x5a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:432 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x594 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:416 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x584 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:400 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x574 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:384 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x564 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:368 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x554 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:352 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x544 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:336 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x534 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:320 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x524 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:304 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x514 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:288 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x504 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:272 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x4f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:256 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x4e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:240 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x4d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:224 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x4c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:208 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x4b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:192 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x4a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:176 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x494 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:160 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x484 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:144 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x474 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:128 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x464 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:112 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x454 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:96 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x444 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:80 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x434 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:64 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x424 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:48 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x414 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:32 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x404 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:16 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3f4 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x400, v4 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x394 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x384 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x374 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x380, v4 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x364 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x354 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x344 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x334 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x324 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x314 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x304 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x300, v4 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x294 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x284 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x274 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x280, v4 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x264 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x254 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x244 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x234 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x224 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x214 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x204 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1f4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x200, v4 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1e4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1d4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1c4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1b4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1a4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x194 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x184 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x174 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x180, v4 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x164 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x154 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x144 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x134 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x124 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x114 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x104 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xf4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x100, v4 |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xe4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xd4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xc4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xb4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xa4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x94 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x84 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x74 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x80, v4 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x64 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4080 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x54 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4064 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x44 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4048 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 52 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4032 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 36 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4016 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 20 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4000 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 4 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:3984 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:3968 |
| ; GFX9-FLATSCR-NEXT: s_endpgm |
| ; |
| ; GFX10-FLATSCR-LABEL: test: |
| ; GFX10-FLATSCR: ; %bb.0: ; %entry |
| ; GFX10-FLATSCR-NEXT: s_add_u32 s8, s8, s13 |
| ; GFX10-FLATSCR-NEXT: s_addc_u32 s9, s9, 0 |
| ; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 |
| ; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 |
| ; GFX10-FLATSCR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; GFX10-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 |
| ; GFX10-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v0, -1, v0 |
| ; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v5, 13, v0 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v4, s4, s2, v5 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e64 v22, null, s3, 0, s4 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x804 |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x80, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v22, vcc_lo |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v2, vcc_lo, 0x100, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v22, vcc_lo |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:20 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:36 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:52 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:68 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:84 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:100 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:116 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:132 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:148 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:164 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:180 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:196 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:212 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:228 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:244 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v6, vcc_lo, 0x180, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v22, vcc_lo |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:260 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:276 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:292 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:308 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:324 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:340 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:356 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:372 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v8, vcc_lo, 0x200, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v22, vcc_lo |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:388 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:404 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:420 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:436 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:452 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:468 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:484 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:500 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v10, vcc_lo, 0x280, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, 0, v22, vcc_lo |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:516 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:532 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:548 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:564 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:580 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:596 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:612 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:628 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v12, vcc_lo, 0x300, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, 0, v22, vcc_lo |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:644 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:660 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:676 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:692 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:708 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:724 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:740 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:756 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v14, vcc_lo, 0x380, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v15, vcc_lo, 0, v22, vcc_lo |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:772 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:788 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:804 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:820 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:836 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:852 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:868 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:884 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v16, vcc_lo, 0x400, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, 0, v22, vcc_lo |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:900 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:916 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:932 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:948 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:964 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:980 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:996 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:1012 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v18, vcc_lo, 0x480, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v19, vcc_lo, 0, v22, vcc_lo |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x500, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1028 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1044 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1060 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1076 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1092 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1108 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1124 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1140 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1156 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1172 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1188 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1204 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1220 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1236 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1252 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x580, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1268 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1284 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1300 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1316 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1332 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1348 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1364 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1380 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x600, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1396 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1412 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1428 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1444 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1460 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1476 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1492 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1508 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x680, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1524 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1540 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1556 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1572 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1588 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1604 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1620 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1636 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x700, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1652 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1668 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1684 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1700 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1716 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1732 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1748 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1764 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x780, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v0 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1780 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1796 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1812 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1828 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1844 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1860 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1876 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1892 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[20:21], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1908 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1924 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1940 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1956 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1972 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1988 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:2004 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:2020 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v2 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:2036 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x814 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x824 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x834 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x844 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x854 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_clause 0x1 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x864 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x874 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v6 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v7, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x884 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x894 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8a4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8b4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8c4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8d4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_clause 0x1 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8e4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8f4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v8 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v9, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x904 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x914 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x924 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x934 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x944 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x954 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_clause 0x1 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x964 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x974 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v10 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v11, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x984 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x994 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9a4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9b4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9c4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9d4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_clause 0x1 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9e4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9f4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v12 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v13, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa04 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa14 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa24 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa34 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa44 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa54 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_clause 0x1 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa64 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa74 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v14 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v15, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa84 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa94 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xaa4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xab4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xac4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xad4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_clause 0x1 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xae4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xaf4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v16 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v17, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb04 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb14 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb24 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb34 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb44 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb54 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_clause 0x1 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb64 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb74 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v18 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v19, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb84 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb94 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xba4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xbb4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xbc4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xbd4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_clause 0x1 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xbe4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xbf4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc04 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:16 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc14 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:32 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc24 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:48 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc34 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:64 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc44 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:80 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc54 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:96 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc64 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:112 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc74 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:128 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc84 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:144 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc94 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:160 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xca4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:176 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xcb4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:192 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xcc4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:208 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xcd4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:224 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xce4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:240 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xcf4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:256 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd04 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:272 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd14 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:288 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd24 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:304 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd34 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:320 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd44 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:336 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd54 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:352 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd64 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:368 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd74 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:384 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd84 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:400 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd94 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:416 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xda4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:432 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xdb4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:448 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xdc4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:464 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xdd4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:480 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xde4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:496 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xdf4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:512 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe04 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:528 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe14 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:544 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe24 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:560 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe34 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:576 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe44 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:592 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe54 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:608 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe64 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:624 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe74 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:640 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe84 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:656 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe94 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:672 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xea4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:688 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xeb4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:704 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xec4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:720 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xed4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:736 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xee4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:752 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xef4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:768 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf04 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:784 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf14 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:800 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf24 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:816 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf34 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:832 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf44 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:848 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf54 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:864 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf64 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:880 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf74 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:896 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf84 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:912 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf94 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:928 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xfa4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:944 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xfb4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:960 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xfc4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:976 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xfd4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:992 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xfe4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1008 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xff4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1024 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1004 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1040 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1014 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1056 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1024 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1072 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1034 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1088 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1044 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1104 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1054 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1120 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1064 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1136 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1074 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1152 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1084 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1168 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1094 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1184 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10a4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1200 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10b4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1216 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10c4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1232 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10d4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1248 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10e4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1264 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10f4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1280 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1104 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1296 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1114 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1312 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1124 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1328 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1134 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1344 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1144 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1360 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1154 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1376 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1164 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1392 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1174 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1408 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1184 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1424 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1194 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1440 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11a4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1456 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11b4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1472 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11c4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1488 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11d4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1504 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11e4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1520 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11f4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1536 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1204 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1552 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1214 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1568 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1224 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1584 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1234 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1600 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1244 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1616 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1254 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1632 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1264 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1648 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1274 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1664 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1284 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1680 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1294 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1696 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12a4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1712 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12b4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1728 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12c4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1744 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12d4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1760 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12e4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1776 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12f4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1792 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1304 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1808 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1314 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1824 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1324 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1840 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1334 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1856 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1344 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1872 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1354 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1888 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1364 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1904 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1374 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1384 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1936 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1394 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1952 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x13a4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1968 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x13b4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1984 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x13c4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2000 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x13d4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2016 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x13e4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2032 |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v4, s2, s0, v5 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e64 v6, null, s1, 0, s2 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x13e4 |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x13d4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x13c4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x13b4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x13a4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1394 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1384 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1374 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1364 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1904 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1354 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1888 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1344 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1872 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1334 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1856 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1324 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1840 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1314 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1824 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1304 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1808 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12f4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1792 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12e4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1776 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12d4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1760 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12c4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1744 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12b4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1728 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12a4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1712 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1294 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1696 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1284 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1680 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1274 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1664 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1264 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1648 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1254 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1632 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1244 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1616 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1234 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1600 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1224 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1584 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1214 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1568 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1204 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1552 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11f4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1536 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11e4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1520 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11d4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1504 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11c4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1488 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11b4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1472 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11a4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1456 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1194 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1440 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1184 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1424 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1174 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1408 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1164 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1392 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1154 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1376 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1144 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1360 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1134 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1344 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1124 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1328 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1114 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1312 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1104 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1296 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10f4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1280 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10e4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1264 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10d4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1248 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10c4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1232 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10b4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1216 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10a4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1200 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1094 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1184 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1084 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1168 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1074 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1152 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1064 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1136 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1054 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1120 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1044 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1104 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1034 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1088 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1024 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1072 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1014 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1056 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1004 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1040 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xff4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1024 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xfe4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1008 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xfd4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:992 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xfc4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:976 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xfb4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:960 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xfa4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:944 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf94 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:928 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf84 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:912 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf74 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:896 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf64 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:880 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf54 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:864 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf44 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:848 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf34 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:832 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf24 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:816 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf14 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:800 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf04 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:784 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xef4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:768 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xee4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:752 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xed4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:736 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xec4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:720 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xeb4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:704 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xea4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:688 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe94 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:672 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe84 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:656 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe74 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:640 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe64 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:624 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe54 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:608 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe44 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:592 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe34 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:576 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe24 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:560 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe14 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:544 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe04 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:528 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xdf4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:512 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xde4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:496 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xdd4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:480 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xdc4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:464 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xdb4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:448 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xda4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:432 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd94 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:416 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd84 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:400 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd74 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:384 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd64 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:368 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd54 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:352 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd44 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:336 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd34 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:320 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd24 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:304 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd14 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:288 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd04 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:272 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xcf4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:256 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xce4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:240 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xcd4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:224 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xcc4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:208 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xcb4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:192 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xca4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:176 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc94 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:160 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc84 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:144 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc74 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:128 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc64 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:112 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc54 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:96 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc44 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:80 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc34 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:64 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc24 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:48 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc14 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:32 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc04 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:16 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xbf4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x480, v4 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v6, vcc_lo |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v2, vcc_lo, 0x780, v0 |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xbe4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xbd4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xbc4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xbb4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xba4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb94 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb84 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb74 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v2, vcc_lo, 0x400, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v6, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb64 |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v7, vcc_lo, 0x780, v2 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v3, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb54 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb44 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb34 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb24 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb14 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb04 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xaf4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v7, vcc_lo, 0x380, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v6, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xae4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v9, vcc_lo, 0x780, v7 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, 0, v8, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xad4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xac4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xab4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xaa4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa94 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa84 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa74 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v9, vcc_lo, 0x300, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, 0, v6, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa64 |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v11, vcc_lo, 0x780, v9 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, 0, v10, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa54 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa44 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa34 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa24 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa14 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa04 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9f4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v11, vcc_lo, 0x280, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, 0, v6, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9e4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v13, vcc_lo, 0x780, v11 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v14, vcc_lo, 0, v12, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9d4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9c4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9b4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9a4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x994 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x984 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x974 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v13, vcc_lo, 0x200, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v14, vcc_lo, 0, v6, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x964 |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v15, vcc_lo, 0x780, v13 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, 0, v14, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x954 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x944 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x934 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x924 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x914 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x904 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8f4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v15, vcc_lo, 0x180, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, 0, v6, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8e4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v17, vcc_lo, 0x780, v15 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v18, vcc_lo, 0, v16, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8d4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8c4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8b4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8a4 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x894 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x884 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x874 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v17, vcc_lo, 0x100, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v18, vcc_lo, 0, v6, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x864 |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v19, vcc_lo, 0x780, v17 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, 0, v18, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x854 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x844 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x834 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x824 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x814 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x804 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:2036 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v19, vcc_lo, 0x80, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, 0, v6, vcc_lo |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x780, v19 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v20, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:2020 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:2004 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1988 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1972 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1956 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1940 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1924 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1908 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x780, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1892 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1876 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1860 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1844 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1828 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1812 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1796 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1780 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x700, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1764 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1748 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1732 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1716 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1700 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1684 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1668 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1652 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x680, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1636 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1620 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1604 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1588 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1572 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1556 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1540 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1524 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x600, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1508 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1492 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1476 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1460 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1444 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1428 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1412 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1396 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x580, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo |
| ; GFX10-FLATSCR-NEXT: v_add_co_u32 v4, vcc_lo, 0x500, v4 |
| ; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v6, vcc_lo |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1380 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1364 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1348 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1332 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1316 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1300 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1284 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1268 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1252 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1236 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1220 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1204 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1188 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1172 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1156 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1140 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1124 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1108 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1092 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1076 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1060 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1044 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1028 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1012 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:996 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:980 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:964 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:948 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:932 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:916 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:900 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:884 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:868 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:852 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:836 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:820 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:804 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:788 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:772 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:756 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:740 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:724 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:708 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:692 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:676 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:660 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:644 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:628 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:612 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:596 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:580 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:564 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:548 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:532 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:516 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:500 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:484 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:468 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:452 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:436 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:420 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:404 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:388 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:372 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:356 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:340 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:324 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:308 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:292 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:276 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:260 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:244 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:228 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:212 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:196 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:180 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:164 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:148 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:132 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:116 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:2032 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:100 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:2016 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:84 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:2000 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:68 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:1984 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:52 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:1968 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:36 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:1952 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:20 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:1936 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:4 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:1920 |
| ; GFX10-FLATSCR-NEXT: s_endpgm |
| entry: |
| %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) |
| %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) |
| |
| %aptr = getelementptr <1280 x i32>, ptr addrspace(1) %in, i32 %tid |
| %a = load <1280 x i32>, ptr addrspace(1) %aptr |
| |
| ; mark most VGPR registers as used to increase register pressure |
| call void asm sideeffect "", "~{v4},~{v8},~{v12},~{v16},~{v20},~{v24},~{v28},~{v32}" () |
| call void asm sideeffect "", "~{v36},~{v40},~{v44},~{v48},~{v52},~{v56},~{v60},~{v64}" () |
| call void asm sideeffect "", "~{v68},~{v72},~{v76},~{v80},~{v84},~{v88},~{v92},~{v96}" () |
| call void asm sideeffect "", "~{v100},~{v104},~{v108},~{v112},~{v116},~{v120},~{v124},~{v128}" () |
| call void asm sideeffect "", "~{v132},~{v136},~{v140},~{v144},~{v148},~{v152},~{v156},~{v160}" () |
| call void asm sideeffect "", "~{v164},~{v168},~{v172},~{v176},~{v180},~{v184},~{v188},~{v192}" () |
| call void asm sideeffect "", "~{v196},~{v200},~{v204},~{v208},~{v212},~{v216},~{v220},~{v224}" () |
| |
| %outptr = getelementptr <1280 x i32>, ptr addrspace(1) %out, i32 %tid |
| store <1280 x i32> %a, ptr addrspace(1) %outptr |
| |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { |
| ; GFX6-LABEL: test_limited_sgpr: |
| ; GFX6: ; %bb.0: ; %entry |
| ; GFX6-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 |
| ; GFX6-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 |
| ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GFX6-NEXT: s_mov_b32 s42, -1 |
| ; GFX6-NEXT: s_mov_b32 s43, 0xe8f000 |
| ; GFX6-NEXT: s_add_u32 s40, s40, s11 |
| ; GFX6-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 |
| ; GFX6-NEXT: s_addc_u32 s41, s41, 0 |
| ; GFX6-NEXT: v_mbcnt_hi_u32_b32_e32 v0, -1, v0 |
| ; GFX6-NEXT: v_mov_b32_e32 v6, 0 |
| ; GFX6-NEXT: s_mov_b64 s[4:5], exec |
| ; GFX6-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-NEXT: s_mov_b64 exec, 15 |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: v_writelane_b32 v1, s0, 0 |
| ; GFX6-NEXT: v_writelane_b32 v1, s1, 1 |
| ; GFX6-NEXT: v_writelane_b32 v1, s2, 2 |
| ; GFX6-NEXT: v_writelane_b32 v1, s3, 3 |
| ; GFX6-NEXT: s_mov_b32 s8, 0x80400 |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 8, v0 |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:240 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x86a00 |
| ; GFX6-NEXT: s_mov_b64 s[8:9], exec |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:224 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x86600 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:208 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x86200 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:192 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x85e00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:176 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x85a00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:160 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x85600 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:144 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x85200 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:128 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x84e00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:112 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x84a00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:96 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x84600 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:80 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x84200 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:64 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x83a00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 |
| ; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[5:6], s[4:7], 0 addr64 offset:16 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x83200 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[5:6], s[4:7], 0 addr64 offset:32 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x83600 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GFX6-NEXT: s_mov_b64 exec, 15 |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: v_writelane_b32 v4, s0, 0 |
| ; GFX6-NEXT: v_writelane_b32 v4, s1, 1 |
| ; GFX6-NEXT: v_writelane_b32 v4, s2, 2 |
| ; GFX6-NEXT: v_writelane_b32 v4, s3, 3 |
| ; GFX6-NEXT: s_mov_b32 s10, 0x80800 |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s10 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[8:9] |
| ; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[5:6], s[4:7], 0 addr64 offset:48 |
| ; GFX6-NEXT: s_mov_b32 s0, 0x83e00 |
| ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 13, v0 |
| ; GFX6-NEXT: v_add_i32_e32 v4, vcc, 16, v4 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], s0 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Spill |
| ; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(3) |
| ; GFX6-NEXT: v_mov_b32_e32 v7, 1 |
| ; GFX6-NEXT: s_mov_b64 s[0:1], exec |
| ; GFX6-NEXT: buffer_store_dword v7, v4, s[40:43], 0 offen |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ; def s[4:11] |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: s_mov_b64 exec, 0xff |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: v_writelane_b32 v4, s4, 0 |
| ; GFX6-NEXT: v_writelane_b32 v4, s5, 1 |
| ; GFX6-NEXT: v_writelane_b32 v4, s6, 2 |
| ; GFX6-NEXT: v_writelane_b32 v4, s7, 3 |
| ; GFX6-NEXT: v_writelane_b32 v4, s8, 4 |
| ; GFX6-NEXT: v_writelane_b32 v4, s9, 5 |
| ; GFX6-NEXT: v_writelane_b32 v4, s10, 6 |
| ; GFX6-NEXT: v_writelane_b32 v4, s11, 7 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x80c00 |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ; def s[8:15] |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ; def s[16:23] |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ; def s[24:31] |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ; def s[0:3] |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ; def s[4:5] |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ; def s33 |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: s_and_saveexec_b64 s[6:7], vcc |
| ; GFX6-NEXT: s_mov_b64 vcc, s[6:7] |
| ; GFX6-NEXT: s_cbranch_execz .LBB1_2 |
| ; GFX6-NEXT: ; %bb.1: ; %bb0 |
| ; GFX6-NEXT: s_mov_b64 s[6:7], exec |
| ; GFX6-NEXT: s_mov_b64 exec, 0xff |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: v_writelane_b32 v4, s8, 0 |
| ; GFX6-NEXT: v_writelane_b32 v4, s9, 1 |
| ; GFX6-NEXT: v_writelane_b32 v4, s10, 2 |
| ; GFX6-NEXT: v_writelane_b32 v4, s11, 3 |
| ; GFX6-NEXT: v_writelane_b32 v4, s12, 4 |
| ; GFX6-NEXT: v_writelane_b32 v4, s13, 5 |
| ; GFX6-NEXT: v_writelane_b32 v4, s14, 6 |
| ; GFX6-NEXT: v_writelane_b32 v4, s15, 7 |
| ; GFX6-NEXT: s_mov_b32 s34, 0x81400 |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX6-NEXT: s_mov_b64 s[6:7], exec |
| ; GFX6-NEXT: s_mov_b64 exec, 0xff |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_mov_b32 s34, 0x80c00 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s34 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: v_readlane_b32 s8, v4, 0 |
| ; GFX6-NEXT: v_readlane_b32 s9, v4, 1 |
| ; GFX6-NEXT: v_readlane_b32 s10, v4, 2 |
| ; GFX6-NEXT: v_readlane_b32 s11, v4, 3 |
| ; GFX6-NEXT: v_readlane_b32 s12, v4, 4 |
| ; GFX6-NEXT: v_readlane_b32 s13, v4, 5 |
| ; GFX6-NEXT: v_readlane_b32 s14, v4, 6 |
| ; GFX6-NEXT: v_readlane_b32 s15, v4, 7 |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX6-NEXT: s_mov_b64 s[6:7], exec |
| ; GFX6-NEXT: s_mov_b64 exec, 0xff |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: v_writelane_b32 v4, s16, 0 |
| ; GFX6-NEXT: v_writelane_b32 v4, s17, 1 |
| ; GFX6-NEXT: v_writelane_b32 v4, s18, 2 |
| ; GFX6-NEXT: v_writelane_b32 v4, s19, 3 |
| ; GFX6-NEXT: v_writelane_b32 v4, s20, 4 |
| ; GFX6-NEXT: v_writelane_b32 v4, s21, 5 |
| ; GFX6-NEXT: v_writelane_b32 v4, s22, 6 |
| ; GFX6-NEXT: v_writelane_b32 v4, s23, 7 |
| ; GFX6-NEXT: s_mov_b32 s34, 0x81c00 |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX6-NEXT: s_mov_b64 s[6:7], exec |
| ; GFX6-NEXT: s_mov_b64 exec, 0xff |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_mov_b32 s34, 0x81400 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s34 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: v_readlane_b32 s16, v4, 0 |
| ; GFX6-NEXT: v_readlane_b32 s17, v4, 1 |
| ; GFX6-NEXT: v_readlane_b32 s18, v4, 2 |
| ; GFX6-NEXT: v_readlane_b32 s19, v4, 3 |
| ; GFX6-NEXT: v_readlane_b32 s20, v4, 4 |
| ; GFX6-NEXT: v_readlane_b32 s21, v4, 5 |
| ; GFX6-NEXT: v_readlane_b32 s22, v4, 6 |
| ; GFX6-NEXT: v_readlane_b32 s23, v4, 7 |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX6-NEXT: s_mov_b64 s[6:7], exec |
| ; GFX6-NEXT: s_mov_b64 exec, 0xff |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: v_writelane_b32 v4, s24, 0 |
| ; GFX6-NEXT: v_writelane_b32 v4, s25, 1 |
| ; GFX6-NEXT: v_writelane_b32 v4, s26, 2 |
| ; GFX6-NEXT: v_writelane_b32 v4, s27, 3 |
| ; GFX6-NEXT: v_writelane_b32 v4, s28, 4 |
| ; GFX6-NEXT: v_writelane_b32 v4, s29, 5 |
| ; GFX6-NEXT: v_writelane_b32 v4, s30, 6 |
| ; GFX6-NEXT: v_writelane_b32 v4, s31, 7 |
| ; GFX6-NEXT: s_mov_b32 s34, 0x82400 |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX6-NEXT: s_mov_b64 s[6:7], exec |
| ; GFX6-NEXT: s_mov_b64 exec, 0xff |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_mov_b32 s34, 0x81c00 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s34 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: v_readlane_b32 s24, v4, 0 |
| ; GFX6-NEXT: v_readlane_b32 s25, v4, 1 |
| ; GFX6-NEXT: v_readlane_b32 s26, v4, 2 |
| ; GFX6-NEXT: v_readlane_b32 s27, v4, 3 |
| ; GFX6-NEXT: v_readlane_b32 s28, v4, 4 |
| ; GFX6-NEXT: v_readlane_b32 s29, v4, 5 |
| ; GFX6-NEXT: v_readlane_b32 s30, v4, 6 |
| ; GFX6-NEXT: v_readlane_b32 s31, v4, 7 |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX6-NEXT: s_mov_b64 s[6:7], exec |
| ; GFX6-NEXT: s_mov_b64 exec, 15 |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: v_writelane_b32 v4, s0, 0 |
| ; GFX6-NEXT: v_writelane_b32 v4, s1, 1 |
| ; GFX6-NEXT: v_writelane_b32 v4, s2, 2 |
| ; GFX6-NEXT: v_writelane_b32 v4, s3, 3 |
| ; GFX6-NEXT: s_mov_b32 s34, 0x82c00 |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX6-NEXT: s_mov_b64 s[0:1], exec |
| ; GFX6-NEXT: s_mov_b64 exec, 3 |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: v_writelane_b32 v4, s4, 0 |
| ; GFX6-NEXT: v_writelane_b32 v4, s5, 1 |
| ; GFX6-NEXT: s_mov_b32 s2, 0x83000 |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s2 ; 4-byte Folded Spill |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX6-NEXT: s_mov_b64 s[34:35], exec |
| ; GFX6-NEXT: s_mov_b64 exec, 0xff |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_mov_b32 s36, 0x82400 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s36 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: v_readlane_b32 s0, v4, 0 |
| ; GFX6-NEXT: v_readlane_b32 s1, v4, 1 |
| ; GFX6-NEXT: v_readlane_b32 s2, v4, 2 |
| ; GFX6-NEXT: v_readlane_b32 s3, v4, 3 |
| ; GFX6-NEXT: v_readlane_b32 s4, v4, 4 |
| ; GFX6-NEXT: v_readlane_b32 s5, v4, 5 |
| ; GFX6-NEXT: v_readlane_b32 s6, v4, 6 |
| ; GFX6-NEXT: v_readlane_b32 s7, v4, 7 |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[34:35] |
| ; GFX6-NEXT: s_mov_b64 s[34:35], exec |
| ; GFX6-NEXT: s_mov_b64 exec, 15 |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_mov_b32 s44, 0x82c00 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s44 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: v_readlane_b32 s36, v4, 0 |
| ; GFX6-NEXT: v_readlane_b32 s37, v4, 1 |
| ; GFX6-NEXT: v_readlane_b32 s38, v4, 2 |
| ; GFX6-NEXT: v_readlane_b32 s39, v4, 3 |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[34:35] |
| ; GFX6-NEXT: s_mov_b64 s[44:45], exec |
| ; GFX6-NEXT: s_mov_b64 exec, 3 |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: v_mov_b32_e32 v7, 0x20c0 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: v_readlane_b32 s34, v4, 0 |
| ; GFX6-NEXT: v_readlane_b32 s35, v4, 1 |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[44:45] |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ; use s[8:15],s[16:23],s[24:31],s[0:7],s[36:39],s[34:35] |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: .LBB1_2: ; %ret |
| ; GFX6-NEXT: s_or_b64 exec, exec, vcc |
| ; GFX6-NEXT: s_mov_b64 s[4:5], exec |
| ; GFX6-NEXT: s_mov_b64 exec, 15 |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_mov_b32 s6, 0x80400 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s6 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: v_readlane_b32 s0, v4, 0 |
| ; GFX6-NEXT: v_readlane_b32 s1, v4, 1 |
| ; GFX6-NEXT: v_readlane_b32 s2, v4, 2 |
| ; GFX6-NEXT: v_readlane_b32 s3, v4, 3 |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX6-NEXT: s_mov_b64 s[36:37], s[0:1] |
| ; GFX6-NEXT: s_mov_b64 s[4:5], exec |
| ; GFX6-NEXT: s_mov_b64 exec, 15 |
| ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_mov_b32 s6, 0x80800 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s6 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: v_readlane_b32 s0, v4, 0 |
| ; GFX6-NEXT: v_readlane_b32 s1, v4, 1 |
| ; GFX6-NEXT: v_readlane_b32 s2, v4, 2 |
| ; GFX6-NEXT: v_readlane_b32 s3, v4, 3 |
| ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX6-NEXT: s_mov_b32 s0, 0x86a00 |
| ; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b64 s[38:39], s[2:3] |
| ; GFX6-NEXT: s_mov_b32 s0, 0x86600 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:240 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s0, 0x86200 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:224 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s0, 0x85e00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:208 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s0, 0x85a00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:192 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s0, 0x85600 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:176 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s0, 0x85200 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:160 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s0, 0x84e00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:144 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s0, 0x84a00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:128 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s0, 0x84600 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:112 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s0, 0x84200 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:96 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s0, 0x83a00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:80 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s0, 0x83e00 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:64 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s0, 0x83600 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:48 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_mov_b32 s0, 0x83200 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:32 |
| ; GFX6-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload |
| ; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:16 |
| ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[36:39], 0 addr64 |
| ; GFX6-NEXT: s_endpgm |
| ; |
| ; GFX9-FLATSCR-LABEL: test_limited_sgpr: |
| ; GFX9-FLATSCR: ; %bb.0: ; %entry |
| ; GFX9-FLATSCR-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24 |
| ; GFX9-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 |
| ; GFX9-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v0, -1, v0 |
| ; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v5, 8, v0 |
| ; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[38:39] offset:240 |
| ; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2050 |
| ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 16 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[38:39] offset:224 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2040 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[38:39] offset:208 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2030 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[19:22], v5, s[38:39] offset:192 |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[15:18], v5, s[38:39] offset:176 |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[38:39] offset:160 |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v5, s[38:39] offset:144 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2020 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2070 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s0 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v5, s[38:39] offset:128 |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[38:39] offset:112 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2010 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s0 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20c0 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[38:39] offset:96 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20b0 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[38:39] offset:80 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20a0 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[38:39] offset:64 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2090 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[38:39] offset:48 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2080 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[11:14], v5, s[38:39] offset:32 |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v5, s[38:39] offset:16 |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2060 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s0 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[7:10], v5, s[38:39] |
| ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 1 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: v_lshl_add_u32 v4, v7, 13, v4 |
| ; GFX9-FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 |
| ; GFX9-FLATSCR-NEXT: scratch_store_dword v4, v6, off |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ; def s[0:7] |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ; def s[8:15] |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ; def s[16:23] |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ; def s[24:31] |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ; def s[40:43] |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ; def s[38:39] |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ; def s33 |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: s_and_saveexec_b64 s[34:35], vcc |
| ; GFX9-FLATSCR-NEXT: s_cbranch_execz .LBB1_2 |
| ; GFX9-FLATSCR-NEXT: ; %bb.1: ; %bb0 |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ; use s[0:7],s[8:15],s[16:23],s[24:31],s[40:43],s[38:39] |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20d0 |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20e0 |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20f0 |
| ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, v11 |
| ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 ; 16-byte Folded Spill |
| ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v12 |
| ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v13 |
| ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v14 |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20e0 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20d0 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, v3 |
| ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, v2 |
| ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, v1 |
| ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, v0 |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX9-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX9-FLATSCR-NEXT: .LBB1_2: ; %ret |
| ; GFX9-FLATSCR-NEXT: s_or_b64 exec, exec, s[34:35] |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20c0 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20b0 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[36:37] offset:112 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20a0 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[36:37] offset:96 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2090 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[36:37] offset:80 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2080 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[36:37] offset:64 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2060 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[36:37] offset:48 |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[11:14], s[36:37] offset:32 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2050 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[11:14], s[36:37] offset:16 |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[7:10], s[36:37] |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2040 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[6:9], s[36:37] offset:240 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2030 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[6:9], s[36:37] offset:224 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2020 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[6:9], s[36:37] offset:208 |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[19:22], s[36:37] offset:192 |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[15:18], s[36:37] offset:176 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2070 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[36:37] offset:160 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2010 |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[36:37] offset:144 |
| ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 ; 16-byte Folded Reload |
| ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[36:37] offset:128 |
| ; GFX9-FLATSCR-NEXT: s_endpgm |
| ; |
| ; GFX10-FLATSCR-LABEL: test_limited_sgpr: |
| ; GFX10-FLATSCR: ; %bb.0: ; %entry |
| ; GFX10-FLATSCR-NEXT: s_add_u32 s8, s8, s13 |
| ; GFX10-FLATSCR-NEXT: s_addc_u32 s9, s9, 0 |
| ; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 |
| ; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 |
| ; GFX10-FLATSCR-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24 |
| ; GFX10-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 1 |
| ; GFX10-FLATSCR-NEXT: s_mov_b32 s33, exec_lo |
| ; GFX10-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v0, -1, v0 |
| ; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v5, 8, v0 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-FLATSCR-NEXT: s_clause 0xf |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[35:38], v5, s[38:39] offset:240 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[31:34], v5, s[38:39] offset:224 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[27:30], v5, s[38:39] offset:208 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v5, s[38:39] offset:192 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[19:22], v5, s[38:39] offset:176 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[15:18], v5, s[38:39] offset:160 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[11:14], v5, s[38:39] offset:144 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[7:10], v5, s[38:39] offset:128 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[63:66], v5, s[38:39] offset:112 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[59:62], v5, s[38:39] offset:96 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[55:58], v5, s[38:39] offset:80 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[51:54], v5, s[38:39] offset:64 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[47:50], v5, s[38:39] offset:48 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[43:46], v5, s[38:39] offset:32 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[39:42], v5, s[38:39] offset:16 |
| ; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[38:39] |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: v_lshl_add_u32 v4, v0, 13, 16 |
| ; GFX10-FLATSCR-NEXT: scratch_store_dword v4, v6, off |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ; def s[0:7] |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ; def s[8:15] |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ; def s[16:23] |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ; def s[24:31] |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ; def s[40:43] |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ; def s[34:35] |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ; def s38 |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: v_cmpx_eq_u32_e32 0, v0 |
| ; GFX10-FLATSCR-NEXT: s_cbranch_execz .LBB1_2 |
| ; GFX10-FLATSCR-NEXT: ; %bb.1: ; %bb0 |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ; use s[0:7],s[8:15],s[16:23],s[24:31],s[40:43],s[34:35] |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x2010 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v88, v58 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v92, v62 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v87, v57 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v86, v56 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v85, v55 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v91, v61 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v90, v60 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v89, v59 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v60, v34 |
| ; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[63:66], s0 ; 16-byte Folded Spill |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v68, v38 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v59, v33 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v58, v32 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v57, v31 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v67, v37 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v66, v36 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v65, v35 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v10 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v72, v42 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v76, v46 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v80, v50 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v84, v54 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v7 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v71, v41 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v70, v40 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v69, v39 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v40, v14 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v75, v45 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v74, v44 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v73, v43 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v44, v18 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v79, v49 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v78, v48 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v77, v47 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v48, v22 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v83, v53 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v82, v52 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v81, v51 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v52, v26 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v56, v30 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v9 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v8 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v11 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v41, v15 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v45, v19 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v49, v23 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v53, v27 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v39, v13 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v12 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v43, v17 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v42, v16 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v47, v21 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v46, v20 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v51, v25 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v50, v24 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v55, v29 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v54, v28 |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, v33 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, v53 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v49 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v45 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, v41 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, v37 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, v34 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, v35 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v36 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, v57 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v54 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v55 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v56 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v50 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v51 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v52 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, v46 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, v47 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v48 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v42 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, v43 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v44 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, v38 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, v39 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, v40 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v58 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v59 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v60 |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v65 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v66 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v67 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v68 |
| ; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[63:66], off, s0 ; 16-byte Folded Reload |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v59, v89 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v55, v85 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v51, v81 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v47, v77 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v43, v73 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v39, v69 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v60, v90 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v61, v91 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v62, v92 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v56, v86 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v57, v87 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v58, v88 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v52, v82 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v53, v83 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v54, v84 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v48, v78 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v49, v79 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v50, v80 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v44, v74 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v45, v75 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v46, v76 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v40, v70 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v41, v71 |
| ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v42, v72 |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: ;;#ASMSTART |
| ; GFX10-FLATSCR-NEXT: ;;#ASMEND |
| ; GFX10-FLATSCR-NEXT: .LBB1_2: ; %ret |
| ; GFX10-FLATSCR-NEXT: s_or_b32 exec_lo, exec_lo, s33 |
| ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[63:66], s[36:37] offset:112 |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[59:62], s[36:37] offset:96 |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[55:58], s[36:37] offset:80 |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[51:54], s[36:37] offset:64 |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[47:50], s[36:37] offset:48 |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[43:46], s[36:37] offset:32 |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[39:42], s[36:37] offset:16 |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[36:37] |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[35:38], s[36:37] offset:240 |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[31:34], s[36:37] offset:224 |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[27:30], s[36:37] offset:208 |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[23:26], s[36:37] offset:192 |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[19:22], s[36:37] offset:176 |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[15:18], s[36:37] offset:160 |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[11:14], s[36:37] offset:144 |
| ; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[7:10], s[36:37] offset:128 |
| ; GFX10-FLATSCR-NEXT: s_endpgm |
| entry: |
| %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) |
| %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) |
| |
| ; allocate enough scratch to go beyond 2^12 addressing |
| %scratch = alloca <1280 x i32>, align 16, addrspace(5) |
| |
| ; load VGPR data |
| %aptr = getelementptr <64 x i32>, ptr addrspace(1) %in, i32 %tid |
| %a = load <64 x i32>, ptr addrspace(1) %aptr |
| |
| ; make sure scratch is used |
| %x = extractelement <64 x i32> %a, i32 0 |
| %sptr0 = getelementptr <1280 x i32>, ptr addrspace(5) %scratch, i32 %x, i32 0 |
| store i32 1, ptr addrspace(5) %sptr0 |
| |
| ; fill up SGPRs |
| %sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () |
| %sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () |
| %sgpr2 = call <8 x i32> asm sideeffect "; def $0", "=s" () |
| %sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () |
| %sgpr4 = call <4 x i32> asm sideeffect "; def $0", "=s" () |
| %sgpr5 = call <2 x i32> asm sideeffect "; def $0", "=s" () |
| %sgpr6 = call i32 asm sideeffect "; def $0", "=s" () |
| |
| %cmp = icmp eq i32 %x, 0 |
| br i1 %cmp, label %bb0, label %ret |
| |
| bb0: |
| ; create SGPR pressure |
| call void asm sideeffect "; use $0,$1,$2,$3,$4,$5", "s,s,s,s,s,s,s"(<8 x i32> %sgpr0, <8 x i32> %sgpr1, <8 x i32> %sgpr2, <8 x i32> %sgpr3, <4 x i32> %sgpr4, <2 x i32> %sgpr5, i32 %sgpr6) |
| |
| ; mark most VGPR registers as used to increase register pressure |
| call void asm sideeffect "", "~{v4},~{v8},~{v12},~{v16},~{v20},~{v24},~{v28},~{v32}" () |
| call void asm sideeffect "", "~{v36},~{v40},~{v44},~{v48},~{v52},~{v56},~{v60},~{v64}" () |
| call void asm sideeffect "", "~{v68},~{v72},~{v76},~{v80},~{v84},~{v88},~{v92},~{v96}" () |
| call void asm sideeffect "", "~{v100},~{v104},~{v108},~{v112},~{v116},~{v120},~{v124},~{v128}" () |
| call void asm sideeffect "", "~{v132},~{v136},~{v140},~{v144},~{v148},~{v152},~{v156},~{v160}" () |
| call void asm sideeffect "", "~{v164},~{v168},~{v172},~{v176},~{v180},~{v184},~{v188},~{v192}" () |
| call void asm sideeffect "", "~{v196},~{v200},~{v204},~{v208},~{v212},~{v216},~{v220},~{v224}" () |
| br label %ret |
| |
| ret: |
| %outptr = getelementptr <64 x i32>, ptr addrspace(1) %out, i32 %tid |
| store <64 x i32> %a, ptr addrspace(1) %outptr |
| |
| ret void |
| } |
| |
| declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 |
| declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1 |
| |
| attributes #0 = { "amdgpu-waves-per-eu"="10,10" } |
| attributes #1 = { nounwind readnone } |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; CHECK: {{.*}} |
| ; FLATSCR: {{.*}} |