| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX900 %s |
| ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX906 %s |
| ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx908 -O0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s |
| ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx90a -O0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX90a %s |
| |
| ; This test used to crash for gfx908 while allocating the tuple. Compared to the other subtargets, |
| ; gfx908 marks an extra VGPR reserved for AGPR to VGPR copy that puts more register pressure. |
| ; To minimize the register pressure, the VGPRs used for CSR SGPR spilling has been picked from the |
| ; higher available range there by allowing more VGPRs available in the lowest range for allocation. |
| |
| define i32 @test_tuple(<16 x i64> %0) { |
| ; GFX900-LABEL: test_tuple: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_or_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill |
| ; GFX900-NEXT: v_writelane_b32 v63, s36, 0 |
| ; GFX900-NEXT: v_writelane_b32 v63, s37, 1 |
| ; GFX900-NEXT: v_writelane_b32 v63, s38, 2 |
| ; GFX900-NEXT: v_writelane_b32 v63, s39, 3 |
| ; GFX900-NEXT: v_writelane_b32 v63, s48, 4 |
| ; GFX900-NEXT: v_writelane_b32 v63, s49, 5 |
| ; GFX900-NEXT: v_writelane_b32 v63, s50, 6 |
| ; GFX900-NEXT: v_writelane_b32 v63, s51, 7 |
| ; GFX900-NEXT: v_writelane_b32 v63, s52, 8 |
| ; GFX900-NEXT: v_writelane_b32 v63, s53, 9 |
| ; GFX900-NEXT: v_writelane_b32 v63, s54, 10 |
| ; GFX900-NEXT: v_writelane_b32 v63, s55, 11 |
| ; GFX900-NEXT: v_writelane_b32 v63, s64, 12 |
| ; GFX900-NEXT: v_writelane_b32 v63, s65, 13 |
| ; GFX900-NEXT: v_writelane_b32 v63, s66, 14 |
| ; GFX900-NEXT: v_writelane_b32 v63, s67, 15 |
| ; GFX900-NEXT: v_mov_b32_e32 v33, v30 |
| ; GFX900-NEXT: v_mov_b32_e32 v34, v29 |
| ; GFX900-NEXT: v_mov_b32_e32 v35, v28 |
| ; GFX900-NEXT: v_mov_b32_e32 v36, v27 |
| ; GFX900-NEXT: v_mov_b32_e32 v37, v26 |
| ; GFX900-NEXT: v_mov_b32_e32 v38, v25 |
| ; GFX900-NEXT: v_mov_b32_e32 v39, v24 |
| ; GFX900-NEXT: v_mov_b32_e32 v48, v23 |
| ; GFX900-NEXT: v_mov_b32_e32 v49, v22 |
| ; GFX900-NEXT: v_mov_b32_e32 v50, v21 |
| ; GFX900-NEXT: v_mov_b32_e32 v51, v20 |
| ; GFX900-NEXT: v_mov_b32_e32 v52, v19 |
| ; GFX900-NEXT: v_mov_b32_e32 v53, v18 |
| ; GFX900-NEXT: v_mov_b32_e32 v54, v17 |
| ; GFX900-NEXT: v_mov_b32_e32 v55, v16 |
| ; GFX900-NEXT: v_mov_b32_e32 v40, v15 |
| ; GFX900-NEXT: v_mov_b32_e32 v41, v14 |
| ; GFX900-NEXT: v_mov_b32_e32 v42, v13 |
| ; GFX900-NEXT: v_mov_b32_e32 v43, v12 |
| ; GFX900-NEXT: v_mov_b32_e32 v44, v11 |
| ; GFX900-NEXT: v_mov_b32_e32 v45, v10 |
| ; GFX900-NEXT: v_mov_b32_e32 v46, v9 |
| ; GFX900-NEXT: v_mov_b32_e32 v47, v8 |
| ; GFX900-NEXT: v_mov_b32_e32 v56, v7 |
| ; GFX900-NEXT: v_mov_b32_e32 v57, v6 |
| ; GFX900-NEXT: v_mov_b32_e32 v58, v5 |
| ; GFX900-NEXT: v_mov_b32_e32 v59, v4 |
| ; GFX900-NEXT: v_mov_b32_e32 v60, v3 |
| ; GFX900-NEXT: v_mov_b32_e32 v61, v2 |
| ; GFX900-NEXT: v_mov_b32_e32 v62, v1 |
| ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; implicit-def: $sgpr4 |
| ; GFX900-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 killed $exec |
| ; GFX900-NEXT: v_mov_b32_e32 v1, v62 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, v61 |
| ; GFX900-NEXT: v_mov_b32_e32 v3, v60 |
| ; GFX900-NEXT: v_mov_b32_e32 v4, v59 |
| ; GFX900-NEXT: v_mov_b32_e32 v5, v58 |
| ; GFX900-NEXT: v_mov_b32_e32 v6, v57 |
| ; GFX900-NEXT: v_mov_b32_e32 v7, v56 |
| ; GFX900-NEXT: v_mov_b32_e32 v8, v47 |
| ; GFX900-NEXT: v_mov_b32_e32 v9, v46 |
| ; GFX900-NEXT: v_mov_b32_e32 v10, v45 |
| ; GFX900-NEXT: v_mov_b32_e32 v11, v44 |
| ; GFX900-NEXT: v_mov_b32_e32 v12, v43 |
| ; GFX900-NEXT: v_mov_b32_e32 v13, v42 |
| ; GFX900-NEXT: v_mov_b32_e32 v14, v41 |
| ; GFX900-NEXT: v_mov_b32_e32 v15, v40 |
| ; GFX900-NEXT: v_mov_b32_e32 v16, v55 |
| ; GFX900-NEXT: v_mov_b32_e32 v17, v54 |
| ; GFX900-NEXT: v_mov_b32_e32 v18, v53 |
| ; GFX900-NEXT: v_mov_b32_e32 v19, v52 |
| ; GFX900-NEXT: v_mov_b32_e32 v20, v51 |
| ; GFX900-NEXT: v_mov_b32_e32 v21, v50 |
| ; GFX900-NEXT: v_mov_b32_e32 v22, v49 |
| ; GFX900-NEXT: v_mov_b32_e32 v23, v48 |
| ; GFX900-NEXT: v_mov_b32_e32 v24, v39 |
| ; GFX900-NEXT: v_mov_b32_e32 v25, v38 |
| ; GFX900-NEXT: v_mov_b32_e32 v26, v37 |
| ; GFX900-NEXT: v_mov_b32_e32 v27, v36 |
| ; GFX900-NEXT: v_mov_b32_e32 v28, v35 |
| ; GFX900-NEXT: v_mov_b32_e32 v29, v34 |
| ; GFX900-NEXT: v_mov_b32_e32 v30, v33 |
| ; GFX900-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec |
| ; GFX900-NEXT: ; implicit-def: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 |
| ; GFX900-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX900-NEXT: v_readlane_b32 s67, v63, 15 |
| ; GFX900-NEXT: v_readlane_b32 s66, v63, 14 |
| ; GFX900-NEXT: v_readlane_b32 s65, v63, 13 |
| ; GFX900-NEXT: v_readlane_b32 s64, v63, 12 |
| ; GFX900-NEXT: v_readlane_b32 s55, v63, 11 |
| ; GFX900-NEXT: v_readlane_b32 s54, v63, 10 |
| ; GFX900-NEXT: v_readlane_b32 s53, v63, 9 |
| ; GFX900-NEXT: v_readlane_b32 s52, v63, 8 |
| ; GFX900-NEXT: v_readlane_b32 s51, v63, 7 |
| ; GFX900-NEXT: v_readlane_b32 s50, v63, 6 |
| ; GFX900-NEXT: v_readlane_b32 s49, v63, 5 |
| ; GFX900-NEXT: v_readlane_b32 s48, v63, 4 |
| ; GFX900-NEXT: v_readlane_b32 s39, v63, 3 |
| ; GFX900-NEXT: v_readlane_b32 s38, v63, 2 |
| ; GFX900-NEXT: v_readlane_b32 s37, v63, 1 |
| ; GFX900-NEXT: v_readlane_b32 s36, v63, 0 |
| ; GFX900-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_or_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX906-LABEL: test_tuple: |
| ; GFX906: ; %bb.0: |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-NEXT: s_or_saveexec_b64 s[4:5], -1 |
| ; GFX906-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill |
| ; GFX906-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX906-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill |
| ; GFX906-NEXT: v_writelane_b32 v63, s36, 0 |
| ; GFX906-NEXT: v_writelane_b32 v63, s37, 1 |
| ; GFX906-NEXT: v_writelane_b32 v63, s38, 2 |
| ; GFX906-NEXT: v_writelane_b32 v63, s39, 3 |
| ; GFX906-NEXT: v_writelane_b32 v63, s48, 4 |
| ; GFX906-NEXT: v_writelane_b32 v63, s49, 5 |
| ; GFX906-NEXT: v_writelane_b32 v63, s50, 6 |
| ; GFX906-NEXT: v_writelane_b32 v63, s51, 7 |
| ; GFX906-NEXT: v_writelane_b32 v63, s52, 8 |
| ; GFX906-NEXT: v_writelane_b32 v63, s53, 9 |
| ; GFX906-NEXT: v_writelane_b32 v63, s54, 10 |
| ; GFX906-NEXT: v_writelane_b32 v63, s55, 11 |
| ; GFX906-NEXT: v_writelane_b32 v63, s64, 12 |
| ; GFX906-NEXT: v_writelane_b32 v63, s65, 13 |
| ; GFX906-NEXT: v_writelane_b32 v63, s66, 14 |
| ; GFX906-NEXT: v_writelane_b32 v63, s67, 15 |
| ; GFX906-NEXT: v_mov_b32_e32 v33, v30 |
| ; GFX906-NEXT: v_mov_b32_e32 v34, v29 |
| ; GFX906-NEXT: v_mov_b32_e32 v35, v28 |
| ; GFX906-NEXT: v_mov_b32_e32 v36, v27 |
| ; GFX906-NEXT: v_mov_b32_e32 v37, v26 |
| ; GFX906-NEXT: v_mov_b32_e32 v38, v25 |
| ; GFX906-NEXT: v_mov_b32_e32 v39, v24 |
| ; GFX906-NEXT: v_mov_b32_e32 v48, v23 |
| ; GFX906-NEXT: v_mov_b32_e32 v49, v22 |
| ; GFX906-NEXT: v_mov_b32_e32 v50, v21 |
| ; GFX906-NEXT: v_mov_b32_e32 v51, v20 |
| ; GFX906-NEXT: v_mov_b32_e32 v52, v19 |
| ; GFX906-NEXT: v_mov_b32_e32 v53, v18 |
| ; GFX906-NEXT: v_mov_b32_e32 v54, v17 |
| ; GFX906-NEXT: v_mov_b32_e32 v55, v16 |
| ; GFX906-NEXT: v_mov_b32_e32 v40, v15 |
| ; GFX906-NEXT: v_mov_b32_e32 v41, v14 |
| ; GFX906-NEXT: v_mov_b32_e32 v42, v13 |
| ; GFX906-NEXT: v_mov_b32_e32 v43, v12 |
| ; GFX906-NEXT: v_mov_b32_e32 v44, v11 |
| ; GFX906-NEXT: v_mov_b32_e32 v45, v10 |
| ; GFX906-NEXT: v_mov_b32_e32 v46, v9 |
| ; GFX906-NEXT: v_mov_b32_e32 v47, v8 |
| ; GFX906-NEXT: v_mov_b32_e32 v56, v7 |
| ; GFX906-NEXT: v_mov_b32_e32 v57, v6 |
| ; GFX906-NEXT: v_mov_b32_e32 v58, v5 |
| ; GFX906-NEXT: v_mov_b32_e32 v59, v4 |
| ; GFX906-NEXT: v_mov_b32_e32 v60, v3 |
| ; GFX906-NEXT: v_mov_b32_e32 v61, v2 |
| ; GFX906-NEXT: v_mov_b32_e32 v62, v1 |
| ; GFX906-NEXT: buffer_load_dword v32, off, s[0:3], s32 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; implicit-def: $sgpr4 |
| ; GFX906-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 killed $exec |
| ; GFX906-NEXT: v_mov_b32_e32 v1, v62 |
| ; GFX906-NEXT: v_mov_b32_e32 v2, v61 |
| ; GFX906-NEXT: v_mov_b32_e32 v3, v60 |
| ; GFX906-NEXT: v_mov_b32_e32 v4, v59 |
| ; GFX906-NEXT: v_mov_b32_e32 v5, v58 |
| ; GFX906-NEXT: v_mov_b32_e32 v6, v57 |
| ; GFX906-NEXT: v_mov_b32_e32 v7, v56 |
| ; GFX906-NEXT: v_mov_b32_e32 v8, v47 |
| ; GFX906-NEXT: v_mov_b32_e32 v9, v46 |
| ; GFX906-NEXT: v_mov_b32_e32 v10, v45 |
| ; GFX906-NEXT: v_mov_b32_e32 v11, v44 |
| ; GFX906-NEXT: v_mov_b32_e32 v12, v43 |
| ; GFX906-NEXT: v_mov_b32_e32 v13, v42 |
| ; GFX906-NEXT: v_mov_b32_e32 v14, v41 |
| ; GFX906-NEXT: v_mov_b32_e32 v15, v40 |
| ; GFX906-NEXT: v_mov_b32_e32 v16, v55 |
| ; GFX906-NEXT: v_mov_b32_e32 v17, v54 |
| ; GFX906-NEXT: v_mov_b32_e32 v18, v53 |
| ; GFX906-NEXT: v_mov_b32_e32 v19, v52 |
| ; GFX906-NEXT: v_mov_b32_e32 v20, v51 |
| ; GFX906-NEXT: v_mov_b32_e32 v21, v50 |
| ; GFX906-NEXT: v_mov_b32_e32 v22, v49 |
| ; GFX906-NEXT: v_mov_b32_e32 v23, v48 |
| ; GFX906-NEXT: v_mov_b32_e32 v24, v39 |
| ; GFX906-NEXT: v_mov_b32_e32 v25, v38 |
| ; GFX906-NEXT: v_mov_b32_e32 v26, v37 |
| ; GFX906-NEXT: v_mov_b32_e32 v27, v36 |
| ; GFX906-NEXT: v_mov_b32_e32 v28, v35 |
| ; GFX906-NEXT: v_mov_b32_e32 v29, v34 |
| ; GFX906-NEXT: v_mov_b32_e32 v30, v33 |
| ; GFX906-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec |
| ; GFX906-NEXT: ; implicit-def: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 |
| ; GFX906-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-NEXT: v_readlane_b32 s67, v63, 15 |
| ; GFX906-NEXT: v_readlane_b32 s66, v63, 14 |
| ; GFX906-NEXT: v_readlane_b32 s65, v63, 13 |
| ; GFX906-NEXT: v_readlane_b32 s64, v63, 12 |
| ; GFX906-NEXT: v_readlane_b32 s55, v63, 11 |
| ; GFX906-NEXT: v_readlane_b32 s54, v63, 10 |
| ; GFX906-NEXT: v_readlane_b32 s53, v63, 9 |
| ; GFX906-NEXT: v_readlane_b32 s52, v63, 8 |
| ; GFX906-NEXT: v_readlane_b32 s51, v63, 7 |
| ; GFX906-NEXT: v_readlane_b32 s50, v63, 6 |
| ; GFX906-NEXT: v_readlane_b32 s49, v63, 5 |
| ; GFX906-NEXT: v_readlane_b32 s48, v63, 4 |
| ; GFX906-NEXT: v_readlane_b32 s39, v63, 3 |
| ; GFX906-NEXT: v_readlane_b32 s38, v63, 2 |
| ; GFX906-NEXT: v_readlane_b32 s37, v63, 1 |
| ; GFX906-NEXT: v_readlane_b32 s36, v63, 0 |
| ; GFX906-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload |
| ; GFX906-NEXT: s_or_saveexec_b64 s[4:5], -1 |
| ; GFX906-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload |
| ; GFX906-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX908-LABEL: test_tuple: |
| ; GFX908: ; %bb.0: |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX908-NEXT: s_or_saveexec_b64 s[4:5], -1 |
| ; GFX908-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill |
| ; GFX908-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX908-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse |
| ; GFX908-NEXT: v_writelane_b32 v62, s36, 0 |
| ; GFX908-NEXT: v_writelane_b32 v62, s37, 1 |
| ; GFX908-NEXT: v_writelane_b32 v62, s38, 2 |
| ; GFX908-NEXT: v_writelane_b32 v62, s39, 3 |
| ; GFX908-NEXT: v_writelane_b32 v62, s48, 4 |
| ; GFX908-NEXT: v_writelane_b32 v62, s49, 5 |
| ; GFX908-NEXT: v_writelane_b32 v62, s50, 6 |
| ; GFX908-NEXT: v_writelane_b32 v62, s51, 7 |
| ; GFX908-NEXT: v_writelane_b32 v62, s52, 8 |
| ; GFX908-NEXT: v_writelane_b32 v62, s53, 9 |
| ; GFX908-NEXT: v_writelane_b32 v62, s54, 10 |
| ; GFX908-NEXT: v_writelane_b32 v62, s55, 11 |
| ; GFX908-NEXT: v_writelane_b32 v62, s64, 12 |
| ; GFX908-NEXT: v_writelane_b32 v62, s65, 13 |
| ; GFX908-NEXT: v_writelane_b32 v62, s66, 14 |
| ; GFX908-NEXT: v_writelane_b32 v62, s67, 15 |
| ; GFX908-NEXT: v_mov_b32_e32 v33, v30 |
| ; GFX908-NEXT: v_mov_b32_e32 v34, v29 |
| ; GFX908-NEXT: v_mov_b32_e32 v35, v28 |
| ; GFX908-NEXT: v_mov_b32_e32 v36, v27 |
| ; GFX908-NEXT: v_mov_b32_e32 v37, v26 |
| ; GFX908-NEXT: v_mov_b32_e32 v38, v25 |
| ; GFX908-NEXT: v_mov_b32_e32 v39, v24 |
| ; GFX908-NEXT: v_mov_b32_e32 v48, v23 |
| ; GFX908-NEXT: v_mov_b32_e32 v49, v22 |
| ; GFX908-NEXT: v_mov_b32_e32 v50, v21 |
| ; GFX908-NEXT: v_mov_b32_e32 v51, v20 |
| ; GFX908-NEXT: v_mov_b32_e32 v52, v19 |
| ; GFX908-NEXT: v_mov_b32_e32 v53, v18 |
| ; GFX908-NEXT: v_mov_b32_e32 v54, v17 |
| ; GFX908-NEXT: v_mov_b32_e32 v55, v16 |
| ; GFX908-NEXT: v_mov_b32_e32 v40, v15 |
| ; GFX908-NEXT: v_mov_b32_e32 v41, v14 |
| ; GFX908-NEXT: v_mov_b32_e32 v42, v13 |
| ; GFX908-NEXT: v_mov_b32_e32 v43, v12 |
| ; GFX908-NEXT: v_mov_b32_e32 v44, v11 |
| ; GFX908-NEXT: v_mov_b32_e32 v45, v10 |
| ; GFX908-NEXT: v_mov_b32_e32 v46, v9 |
| ; GFX908-NEXT: v_mov_b32_e32 v47, v8 |
| ; GFX908-NEXT: v_mov_b32_e32 v56, v7 |
| ; GFX908-NEXT: v_mov_b32_e32 v57, v6 |
| ; GFX908-NEXT: v_mov_b32_e32 v58, v5 |
| ; GFX908-NEXT: v_mov_b32_e32 v59, v4 |
| ; GFX908-NEXT: v_mov_b32_e32 v60, v3 |
| ; GFX908-NEXT: v_mov_b32_e32 v61, v2 |
| ; GFX908-NEXT: v_mov_b32_e32 v32, v1 |
| ; GFX908-NEXT: buffer_load_dword v1, off, s[0:3], s32 |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: v_accvgpr_write_b32 a14, v1 ; Reload Reuse |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; implicit-def: $sgpr4 |
| ; GFX908-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 killed $exec |
| ; GFX908-NEXT: v_mov_b32_e32 v1, v32 |
| ; GFX908-NEXT: s_nop 1 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v32, a14 ; Reload Reuse |
| ; GFX908-NEXT: v_mov_b32_e32 v2, v61 |
| ; GFX908-NEXT: v_mov_b32_e32 v3, v60 |
| ; GFX908-NEXT: v_mov_b32_e32 v4, v59 |
| ; GFX908-NEXT: v_mov_b32_e32 v5, v58 |
| ; GFX908-NEXT: v_mov_b32_e32 v6, v57 |
| ; GFX908-NEXT: v_mov_b32_e32 v7, v56 |
| ; GFX908-NEXT: v_mov_b32_e32 v8, v47 |
| ; GFX908-NEXT: v_mov_b32_e32 v9, v46 |
| ; GFX908-NEXT: v_mov_b32_e32 v10, v45 |
| ; GFX908-NEXT: v_mov_b32_e32 v11, v44 |
| ; GFX908-NEXT: v_mov_b32_e32 v12, v43 |
| ; GFX908-NEXT: v_mov_b32_e32 v13, v42 |
| ; GFX908-NEXT: v_mov_b32_e32 v14, v41 |
| ; GFX908-NEXT: v_mov_b32_e32 v15, v40 |
| ; GFX908-NEXT: v_mov_b32_e32 v16, v55 |
| ; GFX908-NEXT: v_mov_b32_e32 v17, v54 |
| ; GFX908-NEXT: v_mov_b32_e32 v18, v53 |
| ; GFX908-NEXT: v_mov_b32_e32 v19, v52 |
| ; GFX908-NEXT: v_mov_b32_e32 v20, v51 |
| ; GFX908-NEXT: v_mov_b32_e32 v21, v50 |
| ; GFX908-NEXT: v_mov_b32_e32 v22, v49 |
| ; GFX908-NEXT: v_mov_b32_e32 v23, v48 |
| ; GFX908-NEXT: v_mov_b32_e32 v24, v39 |
| ; GFX908-NEXT: v_mov_b32_e32 v25, v38 |
| ; GFX908-NEXT: v_mov_b32_e32 v26, v37 |
| ; GFX908-NEXT: v_mov_b32_e32 v27, v36 |
| ; GFX908-NEXT: v_mov_b32_e32 v28, v35 |
| ; GFX908-NEXT: v_mov_b32_e32 v29, v34 |
| ; GFX908-NEXT: v_mov_b32_e32 v30, v33 |
| ; GFX908-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec |
| ; GFX908-NEXT: ; implicit-def: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 |
| ; GFX908-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX908-NEXT: v_readlane_b32 s67, v62, 15 |
| ; GFX908-NEXT: v_readlane_b32 s66, v62, 14 |
| ; GFX908-NEXT: v_readlane_b32 s65, v62, 13 |
| ; GFX908-NEXT: v_readlane_b32 s64, v62, 12 |
| ; GFX908-NEXT: v_readlane_b32 s55, v62, 11 |
| ; GFX908-NEXT: v_readlane_b32 s54, v62, 10 |
| ; GFX908-NEXT: v_readlane_b32 s53, v62, 9 |
| ; GFX908-NEXT: v_readlane_b32 s52, v62, 8 |
| ; GFX908-NEXT: v_readlane_b32 s51, v62, 7 |
| ; GFX908-NEXT: v_readlane_b32 s50, v62, 6 |
| ; GFX908-NEXT: v_readlane_b32 s49, v62, 5 |
| ; GFX908-NEXT: v_readlane_b32 s48, v62, 4 |
| ; GFX908-NEXT: v_readlane_b32 s39, v62, 3 |
| ; GFX908-NEXT: v_readlane_b32 s38, v62, 2 |
| ; GFX908-NEXT: v_readlane_b32 s37, v62, 1 |
| ; GFX908-NEXT: v_readlane_b32 s36, v62, 0 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse |
| ; GFX908-NEXT: s_or_saveexec_b64 s[4:5], -1 |
| ; GFX908-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload |
| ; GFX908-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90a-LABEL: test_tuple: |
| ; GFX90a: ; %bb.0: |
| ; GFX90a-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90a-NEXT: s_or_saveexec_b64 s[4:5], -1 |
| ; GFX90a-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill |
| ; GFX90a-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse |
| ; GFX90a-NEXT: v_writelane_b32 v63, s36, 0 |
| ; GFX90a-NEXT: v_writelane_b32 v63, s37, 1 |
| ; GFX90a-NEXT: v_writelane_b32 v63, s38, 2 |
| ; GFX90a-NEXT: v_writelane_b32 v63, s39, 3 |
| ; GFX90a-NEXT: v_writelane_b32 v63, s48, 4 |
| ; GFX90a-NEXT: v_writelane_b32 v63, s49, 5 |
| ; GFX90a-NEXT: v_writelane_b32 v63, s50, 6 |
| ; GFX90a-NEXT: v_writelane_b32 v63, s51, 7 |
| ; GFX90a-NEXT: v_writelane_b32 v63, s52, 8 |
| ; GFX90a-NEXT: v_writelane_b32 v63, s53, 9 |
| ; GFX90a-NEXT: v_writelane_b32 v63, s54, 10 |
| ; GFX90a-NEXT: v_writelane_b32 v63, s55, 11 |
| ; GFX90a-NEXT: v_writelane_b32 v63, s64, 12 |
| ; GFX90a-NEXT: v_writelane_b32 v63, s65, 13 |
| ; GFX90a-NEXT: v_writelane_b32 v63, s66, 14 |
| ; GFX90a-NEXT: v_writelane_b32 v63, s67, 15 |
| ; GFX90a-NEXT: v_mov_b32_e32 v33, v30 |
| ; GFX90a-NEXT: v_mov_b32_e32 v34, v29 |
| ; GFX90a-NEXT: v_mov_b32_e32 v35, v28 |
| ; GFX90a-NEXT: v_mov_b32_e32 v36, v27 |
| ; GFX90a-NEXT: v_mov_b32_e32 v37, v26 |
| ; GFX90a-NEXT: v_mov_b32_e32 v38, v25 |
| ; GFX90a-NEXT: v_mov_b32_e32 v39, v24 |
| ; GFX90a-NEXT: v_mov_b32_e32 v48, v23 |
| ; GFX90a-NEXT: v_mov_b32_e32 v49, v22 |
| ; GFX90a-NEXT: v_mov_b32_e32 v50, v21 |
| ; GFX90a-NEXT: v_mov_b32_e32 v51, v20 |
| ; GFX90a-NEXT: v_mov_b32_e32 v52, v19 |
| ; GFX90a-NEXT: v_mov_b32_e32 v53, v18 |
| ; GFX90a-NEXT: v_mov_b32_e32 v54, v17 |
| ; GFX90a-NEXT: v_mov_b32_e32 v55, v16 |
| ; GFX90a-NEXT: v_mov_b32_e32 v40, v15 |
| ; GFX90a-NEXT: v_mov_b32_e32 v41, v14 |
| ; GFX90a-NEXT: v_mov_b32_e32 v42, v13 |
| ; GFX90a-NEXT: v_mov_b32_e32 v43, v12 |
| ; GFX90a-NEXT: v_mov_b32_e32 v44, v11 |
| ; GFX90a-NEXT: v_mov_b32_e32 v45, v10 |
| ; GFX90a-NEXT: v_mov_b32_e32 v46, v9 |
| ; GFX90a-NEXT: v_mov_b32_e32 v47, v8 |
| ; GFX90a-NEXT: v_mov_b32_e32 v56, v7 |
| ; GFX90a-NEXT: v_mov_b32_e32 v57, v6 |
| ; GFX90a-NEXT: v_mov_b32_e32 v58, v5 |
| ; GFX90a-NEXT: v_mov_b32_e32 v59, v4 |
| ; GFX90a-NEXT: v_mov_b32_e32 v60, v3 |
| ; GFX90a-NEXT: v_mov_b32_e32 v61, v2 |
| ; GFX90a-NEXT: v_mov_b32_e32 v62, v1 |
| ; GFX90a-NEXT: buffer_load_dword v32, off, s[0:3], s32 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; implicit-def: $sgpr4 |
| ; GFX90a-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 killed $exec |
| ; GFX90a-NEXT: v_mov_b32_e32 v1, v62 |
| ; GFX90a-NEXT: v_mov_b32_e32 v2, v61 |
| ; GFX90a-NEXT: v_mov_b32_e32 v3, v60 |
| ; GFX90a-NEXT: v_mov_b32_e32 v4, v59 |
| ; GFX90a-NEXT: v_mov_b32_e32 v5, v58 |
| ; GFX90a-NEXT: v_mov_b32_e32 v6, v57 |
| ; GFX90a-NEXT: v_mov_b32_e32 v7, v56 |
| ; GFX90a-NEXT: v_mov_b32_e32 v8, v47 |
| ; GFX90a-NEXT: v_mov_b32_e32 v9, v46 |
| ; GFX90a-NEXT: v_mov_b32_e32 v10, v45 |
| ; GFX90a-NEXT: v_mov_b32_e32 v11, v44 |
| ; GFX90a-NEXT: v_mov_b32_e32 v12, v43 |
| ; GFX90a-NEXT: v_mov_b32_e32 v13, v42 |
| ; GFX90a-NEXT: v_mov_b32_e32 v14, v41 |
| ; GFX90a-NEXT: v_mov_b32_e32 v15, v40 |
| ; GFX90a-NEXT: v_mov_b32_e32 v16, v55 |
| ; GFX90a-NEXT: v_mov_b32_e32 v17, v54 |
| ; GFX90a-NEXT: v_mov_b32_e32 v18, v53 |
| ; GFX90a-NEXT: v_mov_b32_e32 v19, v52 |
| ; GFX90a-NEXT: v_mov_b32_e32 v20, v51 |
| ; GFX90a-NEXT: v_mov_b32_e32 v21, v50 |
| ; GFX90a-NEXT: v_mov_b32_e32 v22, v49 |
| ; GFX90a-NEXT: v_mov_b32_e32 v23, v48 |
| ; GFX90a-NEXT: v_mov_b32_e32 v24, v39 |
| ; GFX90a-NEXT: v_mov_b32_e32 v25, v38 |
| ; GFX90a-NEXT: v_mov_b32_e32 v26, v37 |
| ; GFX90a-NEXT: v_mov_b32_e32 v27, v36 |
| ; GFX90a-NEXT: v_mov_b32_e32 v28, v35 |
| ; GFX90a-NEXT: v_mov_b32_e32 v29, v34 |
| ; GFX90a-NEXT: v_mov_b32_e32 v30, v33 |
| ; GFX90a-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec |
| ; GFX90a-NEXT: ; implicit-def: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 |
| ; GFX90a-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX90a-NEXT: v_readlane_b32 s67, v63, 15 |
| ; GFX90a-NEXT: v_readlane_b32 s66, v63, 14 |
| ; GFX90a-NEXT: v_readlane_b32 s65, v63, 13 |
| ; GFX90a-NEXT: v_readlane_b32 s64, v63, 12 |
| ; GFX90a-NEXT: v_readlane_b32 s55, v63, 11 |
| ; GFX90a-NEXT: v_readlane_b32 s54, v63, 10 |
| ; GFX90a-NEXT: v_readlane_b32 s53, v63, 9 |
| ; GFX90a-NEXT: v_readlane_b32 s52, v63, 8 |
| ; GFX90a-NEXT: v_readlane_b32 s51, v63, 7 |
| ; GFX90a-NEXT: v_readlane_b32 s50, v63, 6 |
| ; GFX90a-NEXT: v_readlane_b32 s49, v63, 5 |
| ; GFX90a-NEXT: v_readlane_b32 s48, v63, 4 |
| ; GFX90a-NEXT: v_readlane_b32 s39, v63, 3 |
| ; GFX90a-NEXT: v_readlane_b32 s38, v63, 2 |
| ; GFX90a-NEXT: v_readlane_b32 s37, v63, 1 |
| ; GFX90a-NEXT: v_readlane_b32 s36, v63, 0 |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse |
| ; GFX90a-NEXT: s_or_saveexec_b64 s[4:5], -1 |
| ; GFX90a-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload |
| ; GFX90a-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX90a-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90a-NEXT: s_setpc_b64 s[30:31] |
| %2 = shufflevector <16 x i64> %0, <16 x i64> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| ret i32 0 |
| } |