| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx900 -O0 < %s | FileCheck -check-prefix=GFX900 %s |
| ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx906 -O0 < %s | FileCheck -check-prefix=GFX906 %s |
| ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx908 -O0 < %s | FileCheck -check-prefix=GFX908 %s |
| ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx90a -O0 < %s | FileCheck -check-prefix=GFX90a %s |
| |
| ; This test used to crash for gfx908 while allocating the tuple. Compared to the other subtargets, |
| ; gfx908 marks an extra VGPR reserved for AGPR to VGPR copy that puts more register pressure. |
| ; To minimize the register pressure, the VGPRs used for CSR SGPR spilling has been picked from the |
| ; higher available range there by allowing more VGPRs available in the lowest range for allocation. |
| |
| define i32 @test_tuple(<16 x i64> %0) { |
| ; GFX900-LABEL: test_tuple: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill |
| ; GFX900-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill |
| ; GFX900-NEXT: v_mov_b32_e32 v33, v30 |
| ; GFX900-NEXT: v_mov_b32_e32 v34, v29 |
| ; GFX900-NEXT: v_mov_b32_e32 v35, v28 |
| ; GFX900-NEXT: v_mov_b32_e32 v36, v27 |
| ; GFX900-NEXT: v_mov_b32_e32 v37, v26 |
| ; GFX900-NEXT: v_mov_b32_e32 v38, v25 |
| ; GFX900-NEXT: v_mov_b32_e32 v39, v24 |
| ; GFX900-NEXT: v_mov_b32_e32 v48, v23 |
| ; GFX900-NEXT: v_mov_b32_e32 v49, v22 |
| ; GFX900-NEXT: v_mov_b32_e32 v50, v21 |
| ; GFX900-NEXT: v_mov_b32_e32 v51, v20 |
| ; GFX900-NEXT: v_mov_b32_e32 v52, v19 |
| ; GFX900-NEXT: v_mov_b32_e32 v53, v18 |
| ; GFX900-NEXT: v_mov_b32_e32 v54, v17 |
| ; GFX900-NEXT: v_mov_b32_e32 v55, v16 |
| ; GFX900-NEXT: v_mov_b32_e32 v40, v15 |
| ; GFX900-NEXT: v_mov_b32_e32 v41, v14 |
| ; GFX900-NEXT: v_mov_b32_e32 v42, v13 |
| ; GFX900-NEXT: v_mov_b32_e32 v43, v12 |
| ; GFX900-NEXT: v_mov_b32_e32 v44, v11 |
| ; GFX900-NEXT: v_mov_b32_e32 v45, v10 |
| ; GFX900-NEXT: v_mov_b32_e32 v46, v9 |
| ; GFX900-NEXT: v_mov_b32_e32 v47, v8 |
| ; GFX900-NEXT: v_mov_b32_e32 v56, v7 |
| ; GFX900-NEXT: v_mov_b32_e32 v57, v6 |
| ; GFX900-NEXT: v_mov_b32_e32 v58, v5 |
| ; GFX900-NEXT: v_mov_b32_e32 v59, v4 |
| ; GFX900-NEXT: v_mov_b32_e32 v60, v3 |
| ; GFX900-NEXT: v_mov_b32_e32 v61, v2 |
| ; GFX900-NEXT: v_mov_b32_e32 v62, v1 |
| ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 |
| ; GFX900-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 killed $exec |
| ; GFX900-NEXT: v_mov_b32_e32 v1, v62 |
| ; GFX900-NEXT: v_mov_b32_e32 v2, v61 |
| ; GFX900-NEXT: v_mov_b32_e32 v3, v60 |
| ; GFX900-NEXT: v_mov_b32_e32 v4, v59 |
| ; GFX900-NEXT: v_mov_b32_e32 v5, v58 |
| ; GFX900-NEXT: v_mov_b32_e32 v6, v57 |
| ; GFX900-NEXT: v_mov_b32_e32 v7, v56 |
| ; GFX900-NEXT: v_mov_b32_e32 v8, v47 |
| ; GFX900-NEXT: v_mov_b32_e32 v9, v46 |
| ; GFX900-NEXT: v_mov_b32_e32 v10, v45 |
| ; GFX900-NEXT: v_mov_b32_e32 v11, v44 |
| ; GFX900-NEXT: v_mov_b32_e32 v12, v43 |
| ; GFX900-NEXT: v_mov_b32_e32 v13, v42 |
| ; GFX900-NEXT: v_mov_b32_e32 v14, v41 |
| ; GFX900-NEXT: v_mov_b32_e32 v15, v40 |
| ; GFX900-NEXT: v_mov_b32_e32 v16, v55 |
| ; GFX900-NEXT: v_mov_b32_e32 v17, v54 |
| ; GFX900-NEXT: v_mov_b32_e32 v18, v53 |
| ; GFX900-NEXT: v_mov_b32_e32 v19, v52 |
| ; GFX900-NEXT: v_mov_b32_e32 v20, v51 |
| ; GFX900-NEXT: v_mov_b32_e32 v21, v50 |
| ; GFX900-NEXT: v_mov_b32_e32 v22, v49 |
| ; GFX900-NEXT: v_mov_b32_e32 v23, v48 |
| ; GFX900-NEXT: v_mov_b32_e32 v24, v39 |
| ; GFX900-NEXT: v_mov_b32_e32 v25, v38 |
| ; GFX900-NEXT: v_mov_b32_e32 v26, v37 |
| ; GFX900-NEXT: v_mov_b32_e32 v27, v36 |
| ; GFX900-NEXT: v_mov_b32_e32 v28, v35 |
| ; GFX900-NEXT: v_mov_b32_e32 v29, v34 |
| ; GFX900-NEXT: v_mov_b32_e32 v30, v33 |
| ; GFX900-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec |
| ; GFX900-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX900-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload |
| ; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX906-LABEL: test_tuple: |
| ; GFX906: ; %bb.0: |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill |
| ; GFX906-NEXT: v_mov_b32_e32 v33, v30 |
| ; GFX906-NEXT: v_mov_b32_e32 v34, v29 |
| ; GFX906-NEXT: v_mov_b32_e32 v35, v28 |
| ; GFX906-NEXT: v_mov_b32_e32 v36, v27 |
| ; GFX906-NEXT: v_mov_b32_e32 v37, v26 |
| ; GFX906-NEXT: v_mov_b32_e32 v38, v25 |
| ; GFX906-NEXT: v_mov_b32_e32 v39, v24 |
| ; GFX906-NEXT: v_mov_b32_e32 v48, v23 |
| ; GFX906-NEXT: v_mov_b32_e32 v49, v22 |
| ; GFX906-NEXT: v_mov_b32_e32 v50, v21 |
| ; GFX906-NEXT: v_mov_b32_e32 v51, v20 |
| ; GFX906-NEXT: v_mov_b32_e32 v52, v19 |
| ; GFX906-NEXT: v_mov_b32_e32 v53, v18 |
| ; GFX906-NEXT: v_mov_b32_e32 v54, v17 |
| ; GFX906-NEXT: v_mov_b32_e32 v55, v16 |
| ; GFX906-NEXT: v_mov_b32_e32 v40, v15 |
| ; GFX906-NEXT: v_mov_b32_e32 v41, v14 |
| ; GFX906-NEXT: v_mov_b32_e32 v42, v13 |
| ; GFX906-NEXT: v_mov_b32_e32 v43, v12 |
| ; GFX906-NEXT: v_mov_b32_e32 v44, v11 |
| ; GFX906-NEXT: v_mov_b32_e32 v45, v10 |
| ; GFX906-NEXT: v_mov_b32_e32 v46, v9 |
| ; GFX906-NEXT: v_mov_b32_e32 v47, v8 |
| ; GFX906-NEXT: v_mov_b32_e32 v56, v7 |
| ; GFX906-NEXT: v_mov_b32_e32 v57, v6 |
| ; GFX906-NEXT: v_mov_b32_e32 v58, v5 |
| ; GFX906-NEXT: v_mov_b32_e32 v59, v4 |
| ; GFX906-NEXT: v_mov_b32_e32 v60, v3 |
| ; GFX906-NEXT: v_mov_b32_e32 v61, v2 |
| ; GFX906-NEXT: v_mov_b32_e32 v62, v1 |
| ; GFX906-NEXT: buffer_load_dword v32, off, s[0:3], s32 |
| ; GFX906-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 killed $exec |
| ; GFX906-NEXT: v_mov_b32_e32 v1, v62 |
| ; GFX906-NEXT: v_mov_b32_e32 v2, v61 |
| ; GFX906-NEXT: v_mov_b32_e32 v3, v60 |
| ; GFX906-NEXT: v_mov_b32_e32 v4, v59 |
| ; GFX906-NEXT: v_mov_b32_e32 v5, v58 |
| ; GFX906-NEXT: v_mov_b32_e32 v6, v57 |
| ; GFX906-NEXT: v_mov_b32_e32 v7, v56 |
| ; GFX906-NEXT: v_mov_b32_e32 v8, v47 |
| ; GFX906-NEXT: v_mov_b32_e32 v9, v46 |
| ; GFX906-NEXT: v_mov_b32_e32 v10, v45 |
| ; GFX906-NEXT: v_mov_b32_e32 v11, v44 |
| ; GFX906-NEXT: v_mov_b32_e32 v12, v43 |
| ; GFX906-NEXT: v_mov_b32_e32 v13, v42 |
| ; GFX906-NEXT: v_mov_b32_e32 v14, v41 |
| ; GFX906-NEXT: v_mov_b32_e32 v15, v40 |
| ; GFX906-NEXT: v_mov_b32_e32 v16, v55 |
| ; GFX906-NEXT: v_mov_b32_e32 v17, v54 |
| ; GFX906-NEXT: v_mov_b32_e32 v18, v53 |
| ; GFX906-NEXT: v_mov_b32_e32 v19, v52 |
| ; GFX906-NEXT: v_mov_b32_e32 v20, v51 |
| ; GFX906-NEXT: v_mov_b32_e32 v21, v50 |
| ; GFX906-NEXT: v_mov_b32_e32 v22, v49 |
| ; GFX906-NEXT: v_mov_b32_e32 v23, v48 |
| ; GFX906-NEXT: v_mov_b32_e32 v24, v39 |
| ; GFX906-NEXT: v_mov_b32_e32 v25, v38 |
| ; GFX906-NEXT: v_mov_b32_e32 v26, v37 |
| ; GFX906-NEXT: v_mov_b32_e32 v27, v36 |
| ; GFX906-NEXT: v_mov_b32_e32 v28, v35 |
| ; GFX906-NEXT: v_mov_b32_e32 v29, v34 |
| ; GFX906-NEXT: v_mov_b32_e32 v30, v33 |
| ; GFX906-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec |
| ; GFX906-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX906-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX908-LABEL: test_tuple: |
| ; GFX908: ; %bb.0: |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX908-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse |
| ; GFX908-NEXT: v_mov_b32_e32 v33, v30 |
| ; GFX908-NEXT: v_mov_b32_e32 v34, v29 |
| ; GFX908-NEXT: v_mov_b32_e32 v35, v28 |
| ; GFX908-NEXT: v_mov_b32_e32 v36, v27 |
| ; GFX908-NEXT: v_mov_b32_e32 v37, v26 |
| ; GFX908-NEXT: v_mov_b32_e32 v38, v25 |
| ; GFX908-NEXT: v_mov_b32_e32 v39, v24 |
| ; GFX908-NEXT: v_mov_b32_e32 v48, v23 |
| ; GFX908-NEXT: v_mov_b32_e32 v49, v22 |
| ; GFX908-NEXT: v_mov_b32_e32 v50, v21 |
| ; GFX908-NEXT: v_mov_b32_e32 v51, v20 |
| ; GFX908-NEXT: v_mov_b32_e32 v52, v19 |
| ; GFX908-NEXT: v_mov_b32_e32 v53, v18 |
| ; GFX908-NEXT: v_mov_b32_e32 v54, v17 |
| ; GFX908-NEXT: v_mov_b32_e32 v55, v16 |
| ; GFX908-NEXT: v_mov_b32_e32 v40, v15 |
| ; GFX908-NEXT: v_mov_b32_e32 v41, v14 |
| ; GFX908-NEXT: v_mov_b32_e32 v42, v13 |
| ; GFX908-NEXT: v_mov_b32_e32 v43, v12 |
| ; GFX908-NEXT: v_mov_b32_e32 v44, v11 |
| ; GFX908-NEXT: v_mov_b32_e32 v45, v10 |
| ; GFX908-NEXT: v_mov_b32_e32 v46, v9 |
| ; GFX908-NEXT: v_mov_b32_e32 v47, v8 |
| ; GFX908-NEXT: v_mov_b32_e32 v56, v7 |
| ; GFX908-NEXT: v_mov_b32_e32 v57, v6 |
| ; GFX908-NEXT: v_mov_b32_e32 v58, v5 |
| ; GFX908-NEXT: v_mov_b32_e32 v59, v4 |
| ; GFX908-NEXT: v_mov_b32_e32 v60, v3 |
| ; GFX908-NEXT: v_mov_b32_e32 v61, v2 |
| ; GFX908-NEXT: v_mov_b32_e32 v62, v1 |
| ; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 |
| ; GFX908-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 killed $exec |
| ; GFX908-NEXT: v_mov_b32_e32 v1, v62 |
| ; GFX908-NEXT: v_mov_b32_e32 v2, v61 |
| ; GFX908-NEXT: v_mov_b32_e32 v3, v60 |
| ; GFX908-NEXT: v_mov_b32_e32 v4, v59 |
| ; GFX908-NEXT: v_mov_b32_e32 v5, v58 |
| ; GFX908-NEXT: v_mov_b32_e32 v6, v57 |
| ; GFX908-NEXT: v_mov_b32_e32 v7, v56 |
| ; GFX908-NEXT: v_mov_b32_e32 v8, v47 |
| ; GFX908-NEXT: v_mov_b32_e32 v9, v46 |
| ; GFX908-NEXT: v_mov_b32_e32 v10, v45 |
| ; GFX908-NEXT: v_mov_b32_e32 v11, v44 |
| ; GFX908-NEXT: v_mov_b32_e32 v12, v43 |
| ; GFX908-NEXT: v_mov_b32_e32 v13, v42 |
| ; GFX908-NEXT: v_mov_b32_e32 v14, v41 |
| ; GFX908-NEXT: v_mov_b32_e32 v15, v40 |
| ; GFX908-NEXT: v_mov_b32_e32 v16, v55 |
| ; GFX908-NEXT: v_mov_b32_e32 v17, v54 |
| ; GFX908-NEXT: v_mov_b32_e32 v18, v53 |
| ; GFX908-NEXT: v_mov_b32_e32 v19, v52 |
| ; GFX908-NEXT: v_mov_b32_e32 v20, v51 |
| ; GFX908-NEXT: v_mov_b32_e32 v21, v50 |
| ; GFX908-NEXT: v_mov_b32_e32 v22, v49 |
| ; GFX908-NEXT: v_mov_b32_e32 v23, v48 |
| ; GFX908-NEXT: v_mov_b32_e32 v24, v39 |
| ; GFX908-NEXT: v_mov_b32_e32 v25, v38 |
| ; GFX908-NEXT: v_mov_b32_e32 v26, v37 |
| ; GFX908-NEXT: v_mov_b32_e32 v27, v36 |
| ; GFX908-NEXT: v_mov_b32_e32 v28, v35 |
| ; GFX908-NEXT: v_mov_b32_e32 v29, v34 |
| ; GFX908-NEXT: v_mov_b32_e32 v30, v33 |
| ; GFX908-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec |
| ; GFX908-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse |
| ; GFX908-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90a-LABEL: test_tuple: |
| ; GFX90a: ; %bb.0: |
| ; GFX90a-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse |
| ; GFX90a-NEXT: v_mov_b32_e32 v33, v30 |
| ; GFX90a-NEXT: v_mov_b32_e32 v34, v29 |
| ; GFX90a-NEXT: v_mov_b32_e32 v35, v28 |
| ; GFX90a-NEXT: v_mov_b32_e32 v36, v27 |
| ; GFX90a-NEXT: v_mov_b32_e32 v37, v26 |
| ; GFX90a-NEXT: v_mov_b32_e32 v38, v25 |
| ; GFX90a-NEXT: v_mov_b32_e32 v39, v24 |
| ; GFX90a-NEXT: v_mov_b32_e32 v48, v23 |
| ; GFX90a-NEXT: v_mov_b32_e32 v49, v22 |
| ; GFX90a-NEXT: v_mov_b32_e32 v50, v21 |
| ; GFX90a-NEXT: v_mov_b32_e32 v51, v20 |
| ; GFX90a-NEXT: v_mov_b32_e32 v52, v19 |
| ; GFX90a-NEXT: v_mov_b32_e32 v53, v18 |
| ; GFX90a-NEXT: v_mov_b32_e32 v54, v17 |
| ; GFX90a-NEXT: v_mov_b32_e32 v55, v16 |
| ; GFX90a-NEXT: v_mov_b32_e32 v40, v15 |
| ; GFX90a-NEXT: v_mov_b32_e32 v41, v14 |
| ; GFX90a-NEXT: v_mov_b32_e32 v42, v13 |
| ; GFX90a-NEXT: v_mov_b32_e32 v43, v12 |
| ; GFX90a-NEXT: v_mov_b32_e32 v44, v11 |
| ; GFX90a-NEXT: v_mov_b32_e32 v45, v10 |
| ; GFX90a-NEXT: v_mov_b32_e32 v46, v9 |
| ; GFX90a-NEXT: v_mov_b32_e32 v47, v8 |
| ; GFX90a-NEXT: v_mov_b32_e32 v56, v7 |
| ; GFX90a-NEXT: v_mov_b32_e32 v57, v6 |
| ; GFX90a-NEXT: v_mov_b32_e32 v58, v5 |
| ; GFX90a-NEXT: v_mov_b32_e32 v59, v4 |
| ; GFX90a-NEXT: v_mov_b32_e32 v60, v3 |
| ; GFX90a-NEXT: v_mov_b32_e32 v61, v2 |
| ; GFX90a-NEXT: v_mov_b32_e32 v62, v1 |
| ; GFX90a-NEXT: buffer_load_dword v32, off, s[0:3], s32 |
| ; GFX90a-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 killed $exec |
| ; GFX90a-NEXT: v_mov_b32_e32 v1, v62 |
| ; GFX90a-NEXT: v_mov_b32_e32 v2, v61 |
| ; GFX90a-NEXT: v_mov_b32_e32 v3, v60 |
| ; GFX90a-NEXT: v_mov_b32_e32 v4, v59 |
| ; GFX90a-NEXT: v_mov_b32_e32 v5, v58 |
| ; GFX90a-NEXT: v_mov_b32_e32 v6, v57 |
| ; GFX90a-NEXT: v_mov_b32_e32 v7, v56 |
| ; GFX90a-NEXT: v_mov_b32_e32 v8, v47 |
| ; GFX90a-NEXT: v_mov_b32_e32 v9, v46 |
| ; GFX90a-NEXT: v_mov_b32_e32 v10, v45 |
| ; GFX90a-NEXT: v_mov_b32_e32 v11, v44 |
| ; GFX90a-NEXT: v_mov_b32_e32 v12, v43 |
| ; GFX90a-NEXT: v_mov_b32_e32 v13, v42 |
| ; GFX90a-NEXT: v_mov_b32_e32 v14, v41 |
| ; GFX90a-NEXT: v_mov_b32_e32 v15, v40 |
| ; GFX90a-NEXT: v_mov_b32_e32 v16, v55 |
| ; GFX90a-NEXT: v_mov_b32_e32 v17, v54 |
| ; GFX90a-NEXT: v_mov_b32_e32 v18, v53 |
| ; GFX90a-NEXT: v_mov_b32_e32 v19, v52 |
| ; GFX90a-NEXT: v_mov_b32_e32 v20, v51 |
| ; GFX90a-NEXT: v_mov_b32_e32 v21, v50 |
| ; GFX90a-NEXT: v_mov_b32_e32 v22, v49 |
| ; GFX90a-NEXT: v_mov_b32_e32 v23, v48 |
| ; GFX90a-NEXT: v_mov_b32_e32 v24, v39 |
| ; GFX90a-NEXT: v_mov_b32_e32 v25, v38 |
| ; GFX90a-NEXT: v_mov_b32_e32 v26, v37 |
| ; GFX90a-NEXT: v_mov_b32_e32 v27, v36 |
| ; GFX90a-NEXT: v_mov_b32_e32 v28, v35 |
| ; GFX90a-NEXT: v_mov_b32_e32 v29, v34 |
| ; GFX90a-NEXT: v_mov_b32_e32 v30, v33 |
| ; GFX90a-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec |
| ; GFX90a-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse |
| ; GFX90a-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse |
| ; GFX90a-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90a-NEXT: s_setpc_b64 s[30:31] |
| %2 = shufflevector <16 x i64> %0, <16 x i64> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| ret i32 0 |
| } |