| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s |
| |
| ; Regression test ensuring that empty subranges are not prematurely eliminated. |
| |
| declare <16 x i63> @llvm.masked.load.v16i63.p0(ptr, <16 x i1>, <16 x i63>) |
| |
| define void @f(ptr %p, <16 x i1> %m, <16 x i63> %pt, <16 x i1> %sc, |
| ; CHECK-LABEL: f: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 |
| ; CHECK-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:1632 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_mov_b64 exec, s[4:5] |
| ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:980 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:976 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:972 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:968 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:964 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:960 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:956 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:952 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:948 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:944 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:940 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:936 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:932 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:928 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:924 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a32, off, s[0:3], s32 offset:920 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a33, off, s[0:3], s32 offset:916 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a34, off, s[0:3], s32 offset:912 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a35, off, s[0:3], s32 offset:908 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a36, off, s[0:3], s32 offset:904 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a37, off, s[0:3], s32 offset:900 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a38, off, s[0:3], s32 offset:896 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a39, off, s[0:3], s32 offset:892 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a40, off, s[0:3], s32 offset:888 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a41, off, s[0:3], s32 offset:884 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a42, off, s[0:3], s32 offset:880 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a43, off, s[0:3], s32 offset:876 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a44, off, s[0:3], s32 offset:872 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a45, off, s[0:3], s32 offset:868 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a46, off, s[0:3], s32 offset:864 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a47, off, s[0:3], s32 offset:860 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a48, off, s[0:3], s32 offset:856 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a49, off, s[0:3], s32 offset:852 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a50, off, s[0:3], s32 offset:848 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a51, off, s[0:3], s32 offset:844 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a52, off, s[0:3], s32 offset:840 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a53, off, s[0:3], s32 offset:836 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a54, off, s[0:3], s32 offset:832 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a55, off, s[0:3], s32 offset:828 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a56, off, s[0:3], s32 offset:824 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a57, off, s[0:3], s32 offset:820 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a58, off, s[0:3], s32 offset:816 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a59, off, s[0:3], s32 offset:812 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a60, off, s[0:3], s32 offset:808 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a61, off, s[0:3], s32 offset:804 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a62, off, s[0:3], s32 offset:800 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a63, off, s[0:3], s32 offset:796 ; 4-byte Folded Spill |
| ; CHECK-NEXT: v_writelane_b32 v63, s34, 0 |
| ; CHECK-NEXT: v_writelane_b32 v63, s35, 1 |
| ; CHECK-NEXT: v_writelane_b32 v63, s36, 2 |
| ; CHECK-NEXT: v_writelane_b32 v63, s37, 3 |
| ; CHECK-NEXT: v_writelane_b32 v63, s38, 4 |
| ; CHECK-NEXT: v_writelane_b32 v63, s39, 5 |
| ; CHECK-NEXT: v_writelane_b32 v63, s48, 6 |
| ; CHECK-NEXT: v_writelane_b32 v63, s49, 7 |
| ; CHECK-NEXT: v_writelane_b32 v63, s50, 8 |
| ; CHECK-NEXT: v_writelane_b32 v63, s51, 9 |
| ; CHECK-NEXT: v_writelane_b32 v63, s52, 10 |
| ; CHECK-NEXT: v_writelane_b32 v63, s53, 11 |
| ; CHECK-NEXT: v_writelane_b32 v63, s54, 12 |
| ; CHECK-NEXT: v_writelane_b32 v63, s55, 13 |
| ; CHECK-NEXT: v_writelane_b32 v63, s30, 14 |
| ; CHECK-NEXT: v_writelane_b32 v63, s31, 15 |
| ; CHECK-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:1264 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:1268 ; 4-byte Folded Spill |
| ; CHECK-NEXT: v_mov_b32_e32 v30, v29 |
| ; CHECK-NEXT: buffer_load_dword a43, off, s[0:3], s32 offset:368 |
| ; CHECK-NEXT: buffer_load_dword a42, off, s[0:3], s32 offset:364 |
| ; CHECK-NEXT: buffer_load_dword a41, off, s[0:3], s32 offset:360 |
| ; CHECK-NEXT: buffer_load_dword a40, off, s[0:3], s32 offset:356 |
| ; CHECK-NEXT: buffer_load_dword a39, off, s[0:3], s32 offset:352 |
| ; CHECK-NEXT: buffer_load_dword a38, off, s[0:3], s32 offset:348 |
| ; CHECK-NEXT: buffer_load_dword a37, off, s[0:3], s32 offset:344 |
| ; CHECK-NEXT: buffer_load_dword a36, off, s[0:3], s32 offset:340 |
| ; CHECK-NEXT: buffer_load_dword a35, off, s[0:3], s32 offset:336 |
| ; CHECK-NEXT: buffer_load_dword a34, off, s[0:3], s32 offset:332 |
| ; CHECK-NEXT: buffer_load_dword a33, off, s[0:3], s32 offset:328 |
| ; CHECK-NEXT: buffer_load_dword a32, off, s[0:3], s32 offset:324 |
| ; CHECK-NEXT: buffer_load_dword a31, off, s[0:3], s32 offset:320 |
| ; CHECK-NEXT: buffer_load_dword a30, off, s[0:3], s32 offset:316 |
| ; CHECK-NEXT: buffer_load_dword a29, off, s[0:3], s32 offset:312 |
| ; CHECK-NEXT: buffer_load_dword a28, off, s[0:3], s32 offset:308 |
| ; CHECK-NEXT: buffer_load_dword a27, off, s[0:3], s32 offset:304 |
| ; CHECK-NEXT: buffer_load_dword a26, off, s[0:3], s32 offset:300 |
| ; CHECK-NEXT: buffer_load_dword a25, off, s[0:3], s32 offset:296 |
| ; CHECK-NEXT: buffer_load_dword a24, off, s[0:3], s32 offset:292 |
| ; CHECK-NEXT: buffer_load_dword a23, off, s[0:3], s32 offset:288 |
| ; CHECK-NEXT: buffer_load_dword a22, off, s[0:3], s32 offset:284 |
| ; CHECK-NEXT: buffer_load_dword a21, off, s[0:3], s32 offset:280 |
| ; CHECK-NEXT: buffer_load_dword a20, off, s[0:3], s32 offset:276 |
| ; CHECK-NEXT: buffer_load_dword a19, off, s[0:3], s32 offset:272 |
| ; CHECK-NEXT: buffer_load_dword a18, off, s[0:3], s32 offset:268 |
| ; CHECK-NEXT: v_and_b32_e32 v17, 1, v17 |
| ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v17 |
| ; CHECK-NEXT: v_and_b32_e32 v16, 1, v16 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[56:57], 1, v16 |
| ; CHECK-NEXT: s_mov_b64 s[48:49], 0 |
| ; CHECK-NEXT: s_mov_b64 s[52:53], 0 |
| ; CHECK-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:1400 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:1404 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:1496 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:1500 ; 4-byte Folded Spill |
| ; CHECK-NEXT: v_mov_b32_e32 v30, v27 |
| ; CHECK-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:1408 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:1412 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:1416 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:1420 ; 4-byte Folded Spill |
| ; CHECK-NEXT: v_mov_b32_e32 v26, v25 |
| ; CHECK-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:1432 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:1436 ; 4-byte Folded Spill |
| ; CHECK-NEXT: v_mov_b32_e32 v26, v23 |
| ; CHECK-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:1424 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:1428 ; 4-byte Folded Spill |
| ; CHECK-NEXT: v_mov_b32_e32 v26, v21 |
| ; CHECK-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:1440 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:1444 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:1448 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:1452 ; 4-byte Folded Spill |
| ; CHECK-NEXT: v_mov_b32_e32 v20, v19 |
| ; CHECK-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:1456 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:1460 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:1464 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:1468 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:992 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:996 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_load_ushort v0, off, s[0:3], s32 offset:136 |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:132 |
| ; CHECK-NEXT: buffer_load_ushort v18, off, s[0:3], s32 offset:128 |
| ; CHECK-NEXT: buffer_load_ushort v19, off, s[0:3], s32 offset:124 |
| ; CHECK-NEXT: buffer_load_ushort v20, off, s[0:3], s32 offset:120 |
| ; CHECK-NEXT: buffer_load_ushort v21, off, s[0:3], s32 offset:116 |
| ; CHECK-NEXT: buffer_load_ushort v23, off, s[0:3], s32 offset:112 |
| ; CHECK-NEXT: buffer_load_ushort v32, off, s[0:3], s32 offset:108 |
| ; CHECK-NEXT: buffer_load_ushort v25, off, s[0:3], s32 offset:104 |
| ; CHECK-NEXT: buffer_load_ushort v33, off, s[0:3], s32 offset:100 |
| ; CHECK-NEXT: buffer_load_ushort v27, off, s[0:3], s32 offset:96 |
| ; CHECK-NEXT: buffer_load_ushort v34, off, s[0:3], s32 offset:92 |
| ; CHECK-NEXT: buffer_load_ushort v26, off, s[0:3], s32 offset:88 |
| ; CHECK-NEXT: buffer_load_ushort v29, off, s[0:3], s32 offset:84 |
| ; CHECK-NEXT: buffer_load_ushort v31, off, s[0:3], s32 offset:80 |
| ; CHECK-NEXT: buffer_load_ushort v35, off, s[0:3], s32 offset:76 |
| ; CHECK-NEXT: s_waitcnt vmcnt(15) |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v15 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[58:59], 1, v0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v14 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[60:61], 1, v0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v13 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[62:63], 1, v0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v12 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[72:73], 1, v0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v11 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[74:75], 1, v0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v10 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[76:77], 1, v0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v9 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[78:79], 1, v0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v8 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[88:89], 1, v0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v7 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[90:91], 1, v0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v6 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[92:93], 1, v0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v5 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[94:95], 1, v0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v4 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(14) |
| ; CHECK-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v2 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[44:45], 1, v1 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[36:37], 1, v0 |
| ; CHECK-NEXT: buffer_load_dword a49, off, s[0:3], s32 offset:392 |
| ; CHECK-NEXT: buffer_load_dword a48, off, s[0:3], s32 offset:388 |
| ; CHECK-NEXT: buffer_load_dword a47, off, s[0:3], s32 offset:384 |
| ; CHECK-NEXT: buffer_load_dword a46, off, s[0:3], s32 offset:380 |
| ; CHECK-NEXT: buffer_load_dword a45, off, s[0:3], s32 offset:376 |
| ; CHECK-NEXT: buffer_load_dword a44, off, s[0:3], s32 offset:372 |
| ; CHECK-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:792 |
| ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:788 |
| ; CHECK-NEXT: s_waitcnt vmcnt(21) |
| ; CHECK-NEXT: v_and_b32_e32 v17, 1, v18 |
| ; CHECK-NEXT: s_waitcnt vmcnt(20) |
| ; CHECK-NEXT: v_and_b32_e32 v18, 1, v19 |
| ; CHECK-NEXT: s_waitcnt vmcnt(19) |
| ; CHECK-NEXT: v_and_b32_e32 v19, 1, v20 |
| ; CHECK-NEXT: s_waitcnt vmcnt(18) |
| ; CHECK-NEXT: v_and_b32_e32 v20, 1, v21 |
| ; CHECK-NEXT: s_waitcnt vmcnt(17) |
| ; CHECK-NEXT: v_and_b32_e32 v21, 1, v23 |
| ; CHECK-NEXT: s_waitcnt vmcnt(16) |
| ; CHECK-NEXT: v_and_b32_e32 v23, 1, v32 |
| ; CHECK-NEXT: s_waitcnt vmcnt(15) |
| ; CHECK-NEXT: v_and_b32_e32 v32, 1, v25 |
| ; CHECK-NEXT: s_waitcnt vmcnt(14) |
| ; CHECK-NEXT: v_and_b32_e32 v25, 1, v33 |
| ; CHECK-NEXT: s_waitcnt vmcnt(13) |
| ; CHECK-NEXT: v_and_b32_e32 v33, 1, v27 |
| ; CHECK-NEXT: s_waitcnt vmcnt(12) |
| ; CHECK-NEXT: v_and_b32_e32 v27, 1, v34 |
| ; CHECK-NEXT: s_waitcnt vmcnt(11) |
| ; CHECK-NEXT: v_and_b32_e32 v34, 1, v26 |
| ; CHECK-NEXT: s_waitcnt vmcnt(10) |
| ; CHECK-NEXT: v_and_b32_e32 v26, 1, v29 |
| ; CHECK-NEXT: s_waitcnt vmcnt(9) |
| ; CHECK-NEXT: v_and_b32_e32 v29, 1, v31 |
| ; CHECK-NEXT: s_waitcnt vmcnt(8) |
| ; CHECK-NEXT: v_and_b32_e32 v31, 1, v35 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[14:15], 1, v32 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[10:11], 1, v33 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v34 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v31 |
| ; CHECK-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:264 |
| ; CHECK-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:260 |
| ; CHECK-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:256 |
| ; CHECK-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:252 |
| ; CHECK-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:248 |
| ; CHECK-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:244 |
| ; CHECK-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:240 |
| ; CHECK-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:236 |
| ; CHECK-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:232 |
| ; CHECK-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:228 |
| ; CHECK-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:224 |
| ; CHECK-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:220 |
| ; CHECK-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:216 |
| ; CHECK-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:212 |
| ; CHECK-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:208 |
| ; CHECK-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:204 |
| ; CHECK-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:200 |
| ; CHECK-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:196 |
| ; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:192 |
| ; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:188 |
| ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:184 |
| ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:180 |
| ; CHECK-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:176 |
| ; CHECK-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:172 |
| ; CHECK-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:168 |
| ; CHECK-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:164 |
| ; CHECK-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:160 |
| ; CHECK-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:156 |
| ; CHECK-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:152 |
| ; CHECK-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:148 |
| ; CHECK-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:144 |
| ; CHECK-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:140 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v18 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v20 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[26:27], 1, v17 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[22:23], 1, v19 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[18:19], 1, v21 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[24:25], 1, v23 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[20:21], 1, v25 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[16:17], 1, v27 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[12:13], 1, v26 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[8:9], 1, v29 |
| ; CHECK-NEXT: buffer_store_dword a18, off, s[0:3], s32 offset:1136 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword a19, off, s[0:3], s32 offset:1140 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a20, off, s[0:3], s32 offset:1144 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a21, off, s[0:3], s32 offset:1148 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a22, off, s[0:3], s32 offset:1152 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a23, off, s[0:3], s32 offset:1156 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a24, off, s[0:3], s32 offset:1160 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a25, off, s[0:3], s32 offset:1164 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a26, off, s[0:3], s32 offset:1168 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a27, off, s[0:3], s32 offset:1172 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a28, off, s[0:3], s32 offset:1176 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a29, off, s[0:3], s32 offset:1180 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a30, off, s[0:3], s32 offset:1184 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a31, off, s[0:3], s32 offset:1188 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a32, off, s[0:3], s32 offset:1192 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a33, off, s[0:3], s32 offset:1196 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a34, off, s[0:3], s32 offset:1200 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a35, off, s[0:3], s32 offset:1204 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a36, off, s[0:3], s32 offset:1208 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a37, off, s[0:3], s32 offset:1212 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a38, off, s[0:3], s32 offset:1216 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a39, off, s[0:3], s32 offset:1220 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a40, off, s[0:3], s32 offset:1224 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a41, off, s[0:3], s32 offset:1228 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a42, off, s[0:3], s32 offset:1232 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a43, off, s[0:3], s32 offset:1236 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_waitcnt vmcnt(60) |
| ; CHECK-NEXT: buffer_store_dword a44, off, s[0:3], s32 offset:1240 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a45, off, s[0:3], s32 offset:1244 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a46, off, s[0:3], s32 offset:1248 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a47, off, s[0:3], s32 offset:1252 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a48, off, s[0:3], s32 offset:1256 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a49, off, s[0:3], s32 offset:1260 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_load_dword a30, off, s[0:3], s32 offset:712 |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_load_dword a28, off, s[0:3], s32 offset:708 |
| ; CHECK-NEXT: buffer_load_dword a26, off, s[0:3], s32 offset:704 |
| ; CHECK-NEXT: buffer_load_dword a24, off, s[0:3], s32 offset:700 |
| ; CHECK-NEXT: buffer_load_dword a22, off, s[0:3], s32 offset:696 |
| ; CHECK-NEXT: buffer_load_dword a20, off, s[0:3], s32 offset:692 |
| ; CHECK-NEXT: buffer_load_dword a18, off, s[0:3], s32 offset:688 |
| ; CHECK-NEXT: buffer_load_dword a16, off, s[0:3], s32 offset:684 |
| ; CHECK-NEXT: buffer_load_dword a14, off, s[0:3], s32 offset:680 |
| ; CHECK-NEXT: buffer_load_dword a12, off, s[0:3], s32 offset:676 |
| ; CHECK-NEXT: buffer_load_dword a10, off, s[0:3], s32 offset:672 |
| ; CHECK-NEXT: buffer_load_dword a8, off, s[0:3], s32 offset:668 |
| ; CHECK-NEXT: buffer_load_dword a6, off, s[0:3], s32 offset:664 |
| ; CHECK-NEXT: buffer_load_dword a4, off, s[0:3], s32 offset:660 |
| ; CHECK-NEXT: ; kill: def $agpr1 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: ; kill: def $agpr3 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: ; kill: def $agpr5 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: ; kill: def $agpr7 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: ; kill: def $agpr9 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: ; kill: def $agpr11 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: ; kill: def $agpr13 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: ; kill: def $agpr15 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: ; kill: def $agpr17 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: ; kill: def $agpr19 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: ; kill: def $agpr21 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: ; kill: def $agpr23 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: ; kill: def $agpr25 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: ; kill: def $agpr27 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: ; kill: def $agpr29 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: ; kill: def $agpr31 killed $sgpr4 killed $exec |
| ; CHECK-NEXT: buffer_load_dword a2, off, s[0:3], s32 offset:656 |
| ; CHECK-NEXT: buffer_load_dword a0, off, s[0:3], s32 offset:652 |
| ; CHECK-NEXT: s_waitcnt vmcnt(48) |
| ; CHECK-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:1272 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:1276 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:1280 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:1284 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:1288 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:1292 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v36, off, s[0:3], s32 offset:1296 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v37, off, s[0:3], s32 offset:1300 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v38, off, s[0:3], s32 offset:1304 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:1308 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:1312 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:1316 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:1320 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:1324 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:1328 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:1332 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:1336 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:1340 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:1344 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:1348 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:1352 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:1356 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v52, off, s[0:3], s32 offset:1360 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:1364 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:1368 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:1372 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:1376 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:1380 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:1384 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:1388 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:1392 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:1396 ; 4-byte Folded Spill |
| ; CHECK-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; CHECK-NEXT: v_cmp_ne_u32_e64 s[46:47], 1, v1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(32) |
| ; CHECK-NEXT: buffer_store_dword a0, off, s[0:3], s32 offset:1000 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword a1, off, s[0:3], s32 offset:1004 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a2, off, s[0:3], s32 offset:1008 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a3, off, s[0:3], s32 offset:1012 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a4, off, s[0:3], s32 offset:1016 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a5, off, s[0:3], s32 offset:1020 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a6, off, s[0:3], s32 offset:1024 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a7, off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a8, off, s[0:3], s32 offset:1032 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a9, off, s[0:3], s32 offset:1036 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a10, off, s[0:3], s32 offset:1040 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a11, off, s[0:3], s32 offset:1044 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a12, off, s[0:3], s32 offset:1048 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a13, off, s[0:3], s32 offset:1052 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a14, off, s[0:3], s32 offset:1056 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a15, off, s[0:3], s32 offset:1060 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a16, off, s[0:3], s32 offset:1064 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a17, off, s[0:3], s32 offset:1068 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a18, off, s[0:3], s32 offset:1072 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a19, off, s[0:3], s32 offset:1076 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a20, off, s[0:3], s32 offset:1080 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a21, off, s[0:3], s32 offset:1084 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a22, off, s[0:3], s32 offset:1088 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a23, off, s[0:3], s32 offset:1092 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a24, off, s[0:3], s32 offset:1096 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a25, off, s[0:3], s32 offset:1100 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a26, off, s[0:3], s32 offset:1104 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a27, off, s[0:3], s32 offset:1108 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a28, off, s[0:3], s32 offset:1112 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a29, off, s[0:3], s32 offset:1116 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a30, off, s[0:3], s32 offset:1120 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword a31, off, s[0:3], s32 offset:1124 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:412 |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_load_dword a6, off, s[0:3], s32 offset:408 |
| ; CHECK-NEXT: buffer_load_dword a20, off, s[0:3], s32 offset:416 |
| ; CHECK-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:404 |
| ; CHECK-NEXT: buffer_load_dword a22, off, s[0:3], s32 offset:420 |
| ; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:400 |
| ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:396 |
| ; CHECK-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:424 |
| ; CHECK-NEXT: buffer_load_dword a32, off, s[0:3], s32 offset:440 |
| ; CHECK-NEXT: buffer_load_dword a34, off, s[0:3], s32 offset:436 |
| ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:444 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1472 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1476 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_load_dword a24, off, s[0:3], s32 offset:432 |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:448 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1480 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1484 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:428 |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:452 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1488 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1492 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:784 |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:780 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:984 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:988 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_load_dword a28, off, s[0:3], s32 offset:8 |
| ; CHECK-NEXT: buffer_load_dword a26, off, s[0:3], s32 offset:4 |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1128 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1132 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_load_dword a30, off, s[0:3], s32 offset:456 |
| ; CHECK-NEXT: buffer_load_dword a36, off, s[0:3], s32 offset:584 |
| ; CHECK-NEXT: buffer_load_dword a40, off, s[0:3], s32 offset:580 |
| ; CHECK-NEXT: buffer_load_dword a38, off, s[0:3], s32 offset:576 |
| ; CHECK-NEXT: buffer_load_dword a42, off, s[0:3], s32 offset:572 |
| ; CHECK-NEXT: buffer_load_dword a44, off, s[0:3], s32 offset:568 |
| ; CHECK-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:564 |
| ; CHECK-NEXT: buffer_load_dword a46, off, s[0:3], s32 offset:560 |
| ; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:556 |
| ; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:552 |
| ; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:548 |
| ; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:544 |
| ; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:540 |
| ; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:536 |
| ; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:532 |
| ; CHECK-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:528 |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:524 |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1504 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1508 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1512 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1516 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:1520 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1524 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:1528 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:1532 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:1536 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:1540 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:1544 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:1548 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:1552 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:1556 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:1560 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:1564 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:1568 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:1572 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:1576 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:1580 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:1584 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:1588 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:1592 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:1596 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v24, off, s[0:3], s32 offset:1600 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v25, off, s[0:3], s32 offset:1604 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:1608 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:1612 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:1616 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:1620 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:1624 ; 4-byte Folded Spill |
| ; CHECK-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:1628 ; 4-byte Folded Spill |
| ; CHECK-NEXT: v_cmp_lt_i32_e64 s[38:39], 2, v0 |
| ; CHECK-NEXT: s_and_saveexec_b64 s[50:51], s[38:39] |
| ; CHECK-NEXT: s_xor_b64 s[50:51], exec, s[50:51] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_4 |
| ; CHECK-NEXT: ; %bb.1: ; %LeafBlock46 |
| ; CHECK-NEXT: v_cmp_eq_u32_e64 s[38:39], 3, v0 |
| ; CHECK-NEXT: s_mov_b64 s[54:55], -1 |
| ; CHECK-NEXT: s_and_saveexec_b64 s[52:53], s[38:39] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_3 |
| ; CHECK-NEXT: ; %bb.2: ; %bb2 |
| ; CHECK-NEXT: v_accvgpr_write_b32 a10, v42 |
| ; CHECK-NEXT: v_accvgpr_write_b32 a14, v48 |
| ; CHECK-NEXT: v_accvgpr_write_b32 a4, v56 |
| ; CHECK-NEXT: v_accvgpr_write_b32 a2, v50 |
| ; CHECK-NEXT: v_accvgpr_write_b32 a11, v43 |
| ; CHECK-NEXT: v_accvgpr_write_b32 a8, v40 |
| ; CHECK-NEXT: v_accvgpr_write_b32 a15, v49 |
| ; CHECK-NEXT: v_accvgpr_write_b32 a5, v57 |
| ; CHECK-NEXT: v_accvgpr_write_b32 a3, v51 |
| ; CHECK-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:1000 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:1004 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:1008 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:1012 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:1016 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:1020 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:1024 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:1032 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:1036 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:1040 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:1044 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:1048 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:1052 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:1056 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:1060 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:1064 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:1068 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:1072 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:1076 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:1080 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:1084 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:1088 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:1092 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:1096 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:1100 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:1104 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:1108 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:1112 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:1116 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:1120 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:1124 ; 4-byte Folded Reload |
| ; CHECK-NEXT: v_accvgpr_write_b32 a16, v58 |
| ; CHECK-NEXT: v_accvgpr_write_b32 a17, v59 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v60, a36 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v58, a40 |
| ; CHECK-NEXT: s_xor_b64 s[54:55], exec, -1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(29) |
| ; CHECK-NEXT: v_xor_b32_e32 v28, v28, v18 |
| ; CHECK-NEXT: s_waitcnt vmcnt(28) |
| ; CHECK-NEXT: v_xor_b32_e32 v0, v29, v3 |
| ; CHECK-NEXT: v_accvgpr_write_b32 a9, v0 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v29, a27 |
| ; CHECK-NEXT: s_waitcnt vmcnt(26) |
| ; CHECK-NEXT: v_xor_b32_e32 v29, v31, v29 |
| ; CHECK-NEXT: s_waitcnt vmcnt(24) |
| ; CHECK-NEXT: v_xor_b32_e32 v0, v33, v7 |
| ; CHECK-NEXT: v_accvgpr_write_b32 a27, v0 |
| ; CHECK-NEXT: v_xor_b32_e32 v0, v30, v4 |
| ; CHECK-NEXT: v_accvgpr_write_b32 a12, v0 |
| ; CHECK-NEXT: s_waitcnt vmcnt(20) |
| ; CHECK-NEXT: v_xor_b32_e32 v62, v37, v11 |
| ; CHECK-NEXT: v_xor_b32_e32 v0, v32, v6 |
| ; CHECK-NEXT: v_xor_b32_e32 v32, v34, v8 |
| ; CHECK-NEXT: v_xor_b32_e32 v34, v36, v10 |
| ; CHECK-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload |
| ; CHECK-NEXT: v_accvgpr_read_b32 v31, a39 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a46 |
| ; CHECK-NEXT: v_xor_b32_e32 v33, v35, v9 |
| ; CHECK-NEXT: s_waitcnt vmcnt(20) |
| ; CHECK-NEXT: v_xor_b32_e32 v35, v39, v13 |
| ; CHECK-NEXT: s_waitcnt vmcnt(15) |
| ; CHECK-NEXT: v_xor_b32_e32 v10, v44, v30 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a44 |
| ; CHECK-NEXT: s_waitcnt vmcnt(11) |
| ; CHECK-NEXT: v_xor_b32_e32 v6, v48, v30 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a42 |
| ; CHECK-NEXT: v_xor_b32_e32 v9, v47, v21 |
| ; CHECK-NEXT: s_waitcnt vmcnt(8) |
| ; CHECK-NEXT: v_xor_b32_e32 v21, v51, v25 |
| ; CHECK-NEXT: v_accvgpr_write_b32 a36, v0 |
| ; CHECK-NEXT: s_waitcnt vmcnt(6) |
| ; CHECK-NEXT: v_xor_b32_e32 v13, v53, v31 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v31, a29 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: v_xor_b32_e32 v1, v55, v31 |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: v_xor_b32_e32 v25, v56, v60 |
| ; CHECK-NEXT: v_xor_b32_e32 v8, v46, v20 |
| ; CHECK-NEXT: v_xor_b32_e32 v20, v50, v30 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a38 |
| ; CHECK-NEXT: v_xor_b32_e32 v0, v54, v58 |
| ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 |
| ; CHECK-NEXT: v_xor_b32_e32 v11, v45, v19 |
| ; CHECK-NEXT: v_xor_b32_e32 v19, v27, v5 |
| ; CHECK-NEXT: v_xor_b32_e32 v4, v52, v30 |
| ; CHECK-NEXT: v_xor_b32_e32 v30, v26, v2 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v5, 17, v25 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 14, v[0:1] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v5 |
| ; CHECK-NEXT: v_and_b32_e32 v5, 0x7fffffff, v13 |
| ; CHECK-NEXT: v_xor_b32_e32 v18, v38, v12 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v12, 18, v0 |
| ; CHECK-NEXT: v_lshrrev_b64 v[0:1], 13, v[4:5] |
| ; CHECK-NEXT: v_or_b32_e32 v1, v1, v12 |
| ; CHECK-NEXT: v_xor_b32_e32 v7, v49, v23 |
| ; CHECK-NEXT: v_and_b32_e32 v21, 0x7fffffff, v21 |
| ; CHECK-NEXT: v_and_b32_e32 v7, 0x7fffffff, v7 |
| ; CHECK-NEXT: v_and_b32_e32 v9, 0x7fffffff, v9 |
| ; CHECK-NEXT: v_and_b32_e32 v11, 0x7fffffff, v11 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v31, a37 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: v_xor_b32_e32 v23, v57, v31 |
| ; CHECK-NEXT: v_xor_b32_e32 v17, v43, v17 |
| ; CHECK-NEXT: v_xor_b32_e32 v16, v42, v16 |
| ; CHECK-NEXT: v_and_b32_e32 v17, 0x7fffffff, v17 |
| ; CHECK-NEXT: v_xor_b32_e32 v15, v41, v15 |
| ; CHECK-NEXT: v_xor_b32_e32 v14, v40, v14 |
| ; CHECK-NEXT: v_and_b32_e32 v15, 0x7fffffff, v15 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v57, a5 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v43, a11 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v49, a15 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v51, a3 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v59, a17 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v56, a4 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v42, a10 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v40, a8 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v48, a14 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v50, a2 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v58, a16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: v_add_co_u32_e64 v12, s[38:39], 24, v36 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_addc_co_u32_e64 v13, s[38:39], 0, v37, s[38:39] |
| ; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[0:3] offset:80 |
| ; CHECK-NEXT: s_movk_i32 s38, 0x7c |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 19, v4 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 12, v[20:21] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 20, v20 |
| ; CHECK-NEXT: v_lshrrev_b64 v[0:1], 11, v[6:7] |
| ; CHECK-NEXT: v_or_b32_e32 v1, v1, v4 |
| ; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[0:3] offset:64 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 22, v8 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 21, v6 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 10, v[8:9] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v0 |
| ; CHECK-NEXT: v_lshrrev_b64 v[0:1], 9, v[10:11] |
| ; CHECK-NEXT: v_or_b32_e32 v1, v1, v4 |
| ; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[0:3] offset:48 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 26, v18 |
| ; CHECK-NEXT: v_add_co_u32_e64 v0, s[38:39], s38, v36 |
| ; CHECK-NEXT: v_addc_co_u32_e64 v1, s[38:39], 0, v37, s[38:39] |
| ; CHECK-NEXT: v_lshrrev_b32_e32 v2, 15, v23 |
| ; CHECK-NEXT: s_movk_i32 s38, 0x78 |
| ; CHECK-NEXT: flat_store_short v[0:1], v2 |
| ; CHECK-NEXT: v_add_co_u32_e64 v0, s[38:39], s38, v36 |
| ; CHECK-NEXT: v_alignbit_b32 v2, v23, v25, 15 |
| ; CHECK-NEXT: v_addc_co_u32_e64 v1, s[38:39], 0, v37, s[38:39] |
| ; CHECK-NEXT: flat_store_dword v[0:1], v2 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffffff, v19 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v28 |
| ; CHECK-NEXT: v_or_b32_e32 v31, v0, v1 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 23, v10 |
| ; CHECK-NEXT: v_lshrrev_b64 v[0:1], 8, v[16:17] |
| ; CHECK-NEXT: v_or_b32_e32 v1, v1, v2 |
| ; CHECK-NEXT: flat_store_dwordx2 v[36:37], v[0:1] offset:64 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 24, v16 |
| ; CHECK-NEXT: v_lshrrev_b64 v[0:1], 7, v[14:15] |
| ; CHECK-NEXT: v_or_b32_e32 v1, v1, v2 |
| ; CHECK-NEXT: v_and_b32_e32 v19, 0x7fffffff, v35 |
| ; CHECK-NEXT: flat_store_dwordx2 v[12:13], v[0:1] offset:32 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 25, v14 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 6, v[18:19] |
| ; CHECK-NEXT: v_and_b32_e32 v35, 0x7fffffff, v62 |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v0 |
| ; CHECK-NEXT: v_lshrrev_b64 v[0:1], 5, v[34:35] |
| ; CHECK-NEXT: v_or_b32_e32 v1, v1, v4 |
| ; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[0:3] offset:16 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v6, a36 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, v33 |
| ; CHECK-NEXT: v_and_b32_e32 v33, 0x7fffffff, v0 |
| ; CHECK-NEXT: v_lshrrev_b64 v[0:1], 4, v[32:33] |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 27, v34 |
| ; CHECK-NEXT: v_or_b32_e32 v1, v1, v2 |
| ; CHECK-NEXT: flat_store_dwordx2 v[36:37], v[0:1] offset:32 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v0, a27 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v2, a36 |
| ; CHECK-NEXT: v_and_b32_e32 v3, 0x7fffffff, v0 |
| ; CHECK-NEXT: v_lshrrev_b64 v[0:1], 3, v[2:3] |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 28, v32 |
| ; CHECK-NEXT: v_or_b32_e32 v1, v1, v2 |
| ; CHECK-NEXT: flat_store_dwordx2 v[12:13], v[0:1] |
| ; CHECK-NEXT: v_mov_b32_e32 v0, v29 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v2, a12 |
| ; CHECK-NEXT: v_and_b32_e32 v3, 0x7fffffff, v0 |
| ; CHECK-NEXT: v_lshrrev_b64 v[0:1], 2, v[2:3] |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 29, v6 |
| ; CHECK-NEXT: v_or_b32_e32 v1, v1, v2 |
| ; CHECK-NEXT: flat_store_dwordx2 v[36:37], v[0:1] offset:16 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v0, a9 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v4, a12 |
| ; CHECK-NEXT: v_and_b32_e32 v29, 0x7fffffff, v0 |
| ; CHECK-NEXT: v_lshrrev_b64 v[32:33], 1, v[28:29] |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 30, v4 |
| ; CHECK-NEXT: v_or_b32_e32 v33, v33, v0 |
| ; CHECK-NEXT: flat_store_dwordx4 v[36:37], v[30:33] |
| ; CHECK-NEXT: .LBB0_3: ; %Flow52 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[52:53] |
| ; CHECK-NEXT: s_and_b64 s[52:53], s[54:55], exec |
| ; CHECK-NEXT: ; implicit-def: $vgpr0 |
| ; CHECK-NEXT: .LBB0_4: ; %Flow51 |
| ; CHECK-NEXT: s_andn2_saveexec_b64 s[50:51], s[50:51] |
| ; CHECK-NEXT: ; %bb.5: ; %LeafBlock |
| ; CHECK-NEXT: v_cmp_ne_u32_e64 s[38:39], 2, v0 |
| ; CHECK-NEXT: s_andn2_b64 s[48:49], s[52:53], exec |
| ; CHECK-NEXT: s_and_b64 s[38:39], s[38:39], exec |
| ; CHECK-NEXT: s_or_b64 s[52:53], s[48:49], s[38:39] |
| ; CHECK-NEXT: s_mov_b64 s[48:49], exec |
| ; CHECK-NEXT: ; %bb.6: ; %Flow53 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[50:51] |
| ; CHECK-NEXT: s_mov_b64 s[38:39], exec |
| ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:992 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:996 ; 4-byte Folded Reload |
| ; CHECK-NEXT: s_and_b64 s[50:51], s[38:39], s[52:53] |
| ; CHECK-NEXT: s_mov_b64 exec, s[50:51] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_44 |
| ; CHECK-NEXT: ; %bb.7: ; %bb0 |
| ; CHECK-NEXT: s_and_saveexec_b64 s[50:51], s[46:47] |
| ; CHECK-NEXT: s_xor_b64 s[50:51], exec, s[50:51] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_41 |
| ; CHECK-NEXT: ; %bb.8: ; %bb0b |
| ; CHECK-NEXT: s_mov_b64 s[52:53], exec |
| ; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:1456 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:1460 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:1464 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:1468 ; 4-byte Folded Reload |
| ; CHECK-NEXT: s_and_b64 s[36:37], s[52:53], s[36:37] |
| ; CHECK-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr9 |
| ; CHECK-NEXT: s_mov_b64 exec, s[36:37] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_10 |
| ; CHECK-NEXT: ; %bb.9: ; %cond.load |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[0:1] |
| ; CHECK-NEXT: .LBB0_10: ; %else |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[52:53] |
| ; CHECK-NEXT: s_mov_b64 s[36:37], exec |
| ; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:1448 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:1452 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:1440 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:1444 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:1424 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:1428 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:1432 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:1436 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:1416 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:1420 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:1408 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:1412 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:1496 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:1500 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:1400 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:1404 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:1264 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:1268 ; 4-byte Folded Reload |
| ; CHECK-NEXT: s_and_b64 s[34:35], s[36:37], s[34:35] |
| ; CHECK-NEXT: s_mov_b64 exec, s[34:35] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_12 |
| ; CHECK-NEXT: ; %bb.11: ; %cond.load1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 v[10:11], v[0:1] offset:8 |
| ; CHECK-NEXT: .LBB0_12: ; %else2 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[36:37] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[34:35], s[30:31] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_14 |
| ; CHECK-NEXT: ; %bb.13: ; %cond.load4 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 v[12:13], v[0:1] offset:16 |
| ; CHECK-NEXT: .LBB0_14: ; %else5 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[34:35] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[30:31], s[94:95] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_16 |
| ; CHECK-NEXT: ; %bb.15: ; %cond.load7 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 v[14:15], v[0:1] offset:24 |
| ; CHECK-NEXT: .LBB0_16: ; %else8 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[30:31] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[94:95], s[92:93] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_18 |
| ; CHECK-NEXT: ; %bb.17: ; %cond.load10 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 v[22:23], v[0:1] offset:32 |
| ; CHECK-NEXT: .LBB0_18: ; %else11 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[94:95] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[92:93], s[90:91] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_20 |
| ; CHECK-NEXT: ; %bb.19: ; %cond.load13 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 v[16:17], v[0:1] offset:40 |
| ; CHECK-NEXT: .LBB0_20: ; %else14 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[92:93] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[90:91], s[88:89] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_22 |
| ; CHECK-NEXT: ; %bb.21: ; %cond.load16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 v[24:25], v[0:1] offset:48 |
| ; CHECK-NEXT: .LBB0_22: ; %else17 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[90:91] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[88:89], s[78:79] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_24 |
| ; CHECK-NEXT: ; %bb.23: ; %cond.load19 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 v[18:19], v[0:1] offset:56 |
| ; CHECK-NEXT: .LBB0_24: ; %else20 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[88:89] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[78:79], s[76:77] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_26 |
| ; CHECK-NEXT: ; %bb.25: ; %cond.load22 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 v[26:27], v[0:1] offset:64 |
| ; CHECK-NEXT: .LBB0_26: ; %else23 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[78:79] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[76:77], s[74:75] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_28 |
| ; CHECK-NEXT: ; %bb.27: ; %cond.load25 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 v[20:21], v[0:1] offset:72 |
| ; CHECK-NEXT: .LBB0_28: ; %else26 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[76:77] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[74:75], s[72:73] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_30 |
| ; CHECK-NEXT: ; %bb.29: ; %cond.load28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 v[28:29], v[0:1] offset:80 |
| ; CHECK-NEXT: .LBB0_30: ; %else29 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[74:75] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[72:73], s[62:63] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_32 |
| ; CHECK-NEXT: ; %bb.31: ; %cond.load31 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 v[32:33], v[0:1] offset:88 |
| ; CHECK-NEXT: .LBB0_32: ; %else32 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[72:73] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[62:63], s[60:61] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_34 |
| ; CHECK-NEXT: ; %bb.33: ; %cond.load34 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 v[30:31], v[0:1] offset:96 |
| ; CHECK-NEXT: .LBB0_34: ; %else35 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[62:63] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[60:61], s[58:59] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_36 |
| ; CHECK-NEXT: ; %bb.35: ; %cond.load37 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 v[2:3], v[0:1] offset:104 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1128 ; 4-byte Folded Spill |
| ; CHECK-NEXT: s_nop 0 |
| ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1132 ; 4-byte Folded Spill |
| ; CHECK-NEXT: .LBB0_36: ; %else38 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[60:61] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[58:59], s[56:57] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_38 |
| ; CHECK-NEXT: ; %bb.37: ; %cond.load40 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 a[26:27], v[0:1] offset:112 |
| ; CHECK-NEXT: .LBB0_38: ; %else41 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[58:59] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[56:57], vcc |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_40 |
| ; CHECK-NEXT: ; %bb.39: ; %cond.load43 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx2 a[28:29], v[0:1] offset:120 |
| ; CHECK-NEXT: .LBB0_40: ; %else44 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[56:57] |
| ; CHECK-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:1128 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:1132 ; 4-byte Folded Reload |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_accvgpr_read_b32 v4, a28 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v5, a29 |
| ; CHECK-NEXT: v_lshrrev_b32_e32 v2, 15, v5 |
| ; CHECK-NEXT: v_and_b32_e32 v31, 0x7fffffff, v31 |
| ; CHECK-NEXT: v_and_b32_e32 v33, 0x7fffffff, v33 |
| ; CHECK-NEXT: v_and_b32_e32 v29, 0x7fffffff, v29 |
| ; CHECK-NEXT: v_and_b32_e32 v21, 0x7fffffff, v21 |
| ; CHECK-NEXT: v_and_b32_e32 v27, 0x7fffffff, v27 |
| ; CHECK-NEXT: v_and_b32_e32 v19, 0x7fffffff, v19 |
| ; CHECK-NEXT: v_and_b32_e32 v25, 0x7fffffff, v25 |
| ; CHECK-NEXT: v_and_b32_e32 v17, 0x7fffffff, v17 |
| ; CHECK-NEXT: v_and_b32_e32 v23, 0x7fffffff, v23 |
| ; CHECK-NEXT: v_and_b32_e32 v15, 0x7fffffff, v15 |
| ; CHECK-NEXT: v_and_b32_e32 v13, 0x7fffffff, v13 |
| ; CHECK-NEXT: v_and_b32_e32 v11, 0x7fffffff, v11 |
| ; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 0x7c, v34 |
| ; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v35, vcc |
| ; CHECK-NEXT: flat_store_short v[0:1], v2 |
| ; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 0x78, v34 |
| ; CHECK-NEXT: v_alignbit_b32 v2, v5, v4, 15 |
| ; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v35, vcc |
| ; CHECK-NEXT: flat_store_dword v[0:1], v2 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v2, a26 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v3, a27 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 31, v10 |
| ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v9 |
| ; CHECK-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3 |
| ; CHECK-NEXT: v_or_b32_e32 v9, v1, v0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 17, v4 |
| ; CHECK-NEXT: v_lshrrev_b64 v[4:5], 14, v[2:3] |
| ; CHECK-NEXT: v_and_b32_e32 v37, 0x7fffffff, v37 |
| ; CHECK-NEXT: v_or_b32_e32 v5, v5, v0 |
| ; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 24, v34 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v6, 18, v2 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 13, v[36:37] |
| ; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v35, vcc |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v6 |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:80 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v6, 20, v30 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 19, v36 |
| ; CHECK-NEXT: v_lshrrev_b64 v[4:5], 12, v[30:31] |
| ; CHECK-NEXT: v_or_b32_e32 v5, v5, v2 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 11, v[32:33] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v6 |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:64 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v28 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 21, v32 |
| ; CHECK-NEXT: v_lshrrev_b64 v[4:5], 10, v[28:29] |
| ; CHECK-NEXT: v_or_b32_e32 v5, v5, v2 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 9, v[20:21] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v6 |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:48 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v6, 26, v24 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v20 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 8, v[26:27] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v4 |
| ; CHECK-NEXT: flat_store_dwordx2 v[34:35], v[2:3] offset:64 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 24, v26 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 7, v[18:19] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v4 |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:32 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 25, v18 |
| ; CHECK-NEXT: v_lshrrev_b64 v[4:5], 6, v[24:25] |
| ; CHECK-NEXT: v_or_b32_e32 v5, v5, v2 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 5, v[16:17] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v6 |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16 |
| ; CHECK-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 27, v16 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 4, v[22:23] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v4 |
| ; CHECK-NEXT: flat_store_dwordx2 v[34:35], v[2:3] offset:32 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 28, v22 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 3, v[14:15] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v4 |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 29, v14 |
| ; CHECK-NEXT: v_lshrrev_b64 v[0:1], 2, v[12:13] |
| ; CHECK-NEXT: v_or_b32_e32 v1, v1, v2 |
| ; CHECK-NEXT: flat_store_dwordx2 v[34:35], v[0:1] offset:16 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 30, v12 |
| ; CHECK-NEXT: v_or_b32_e32 v11, v11, v0 |
| ; CHECK-NEXT: flat_store_dwordx4 v[34:35], v[8:11] |
| ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 |
| ; CHECK-NEXT: ; kill: killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 |
| ; CHECK-NEXT: .LBB0_41: ; %Flow |
| ; CHECK-NEXT: s_andn2_saveexec_b64 s[56:57], s[50:51] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_43 |
| ; CHECK-NEXT: ; %bb.42: ; %bb0a |
| ; CHECK-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:1000 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:1004 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:1008 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:1012 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:1016 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:1020 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:1024 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:1032 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:1036 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:1040 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:1044 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:1048 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:1052 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:1056 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:1060 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:1064 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:1068 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:1072 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:1076 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:1080 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:1084 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:1088 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:1092 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:1096 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:1100 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:1104 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:1108 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:1112 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:1116 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:1120 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:1124 ; 4-byte Folded Reload |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_and_b32_e32 v11, 0x7fffffff, v11 |
| ; CHECK-NEXT: v_and_b32_e32 v13, 0x7fffffff, v13 |
| ; CHECK-NEXT: v_and_b32_e32 v15, 0x7fffffff, v15 |
| ; CHECK-NEXT: v_and_b32_e32 v17, 0x7fffffff, v17 |
| ; CHECK-NEXT: v_and_b32_e32 v19, 0x7fffffff, v19 |
| ; CHECK-NEXT: v_and_b32_e32 v21, 0x7fffffff, v21 |
| ; CHECK-NEXT: v_and_b32_e32 v23, 0x7fffffff, v23 |
| ; CHECK-NEXT: v_and_b32_e32 v25, 0x7fffffff, v25 |
| ; CHECK-NEXT: v_and_b32_e32 v27, 0x7fffffff, v27 |
| ; CHECK-NEXT: v_and_b32_e32 v29, 0x7fffffff, v29 |
| ; CHECK-NEXT: v_and_b32_e32 v31, 0x7fffffff, v31 |
| ; CHECK-NEXT: v_and_b32_e32 v33, 0x7fffffff, v33 |
| ; CHECK-NEXT: v_and_b32_e32 v35, 0x7fffffff, v35 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v6, 18, v36 |
| ; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 0x7c, v52 |
| ; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v53, vcc |
| ; CHECK-NEXT: v_lshrrev_b32_e32 v2, 15, v39 |
| ; CHECK-NEXT: flat_store_short v[0:1], v2 |
| ; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 0x78, v52 |
| ; CHECK-NEXT: v_alignbit_b32 v2, v39, v38, 15 |
| ; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v53, vcc |
| ; CHECK-NEXT: flat_store_dword v[0:1], v2 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 31, v10 |
| ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v9 |
| ; CHECK-NEXT: v_and_b32_e32 v37, 0x7fffffff, v37 |
| ; CHECK-NEXT: v_or_b32_e32 v9, v1, v0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 17, v38 |
| ; CHECK-NEXT: v_lshrrev_b64 v[4:5], 14, v[36:37] |
| ; CHECK-NEXT: v_or_b32_e32 v5, v5, v0 |
| ; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 24, v52 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 13, v[34:35] |
| ; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v53, vcc |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v6 |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:80 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v6, 20, v32 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 19, v34 |
| ; CHECK-NEXT: v_lshrrev_b64 v[4:5], 12, v[32:33] |
| ; CHECK-NEXT: v_or_b32_e32 v5, v5, v2 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 11, v[30:31] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v6 |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:64 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v28 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 21, v30 |
| ; CHECK-NEXT: v_lshrrev_b64 v[4:5], 10, v[28:29] |
| ; CHECK-NEXT: v_or_b32_e32 v5, v5, v2 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 9, v[26:27] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v6 |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:48 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v6, 26, v20 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v26 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 8, v[24:25] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v4 |
| ; CHECK-NEXT: flat_store_dwordx2 v[52:53], v[2:3] offset:64 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 24, v24 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 7, v[22:23] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v4 |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:32 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 25, v22 |
| ; CHECK-NEXT: v_lshrrev_b64 v[4:5], 6, v[20:21] |
| ; CHECK-NEXT: v_or_b32_e32 v5, v5, v2 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 5, v[18:19] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v6 |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16 |
| ; CHECK-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 27, v18 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 4, v[16:17] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v4 |
| ; CHECK-NEXT: flat_store_dwordx2 v[52:53], v[2:3] offset:32 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 28, v16 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 3, v[14:15] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v4 |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 29, v14 |
| ; CHECK-NEXT: v_lshrrev_b64 v[0:1], 2, v[12:13] |
| ; CHECK-NEXT: v_or_b32_e32 v1, v1, v2 |
| ; CHECK-NEXT: flat_store_dwordx2 v[52:53], v[0:1] offset:16 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 30, v12 |
| ; CHECK-NEXT: v_or_b32_e32 v11, v11, v0 |
| ; CHECK-NEXT: flat_store_dwordx4 v[52:53], v[8:11] |
| ; CHECK-NEXT: .LBB0_43: ; %Flow48 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[56:57] |
| ; CHECK-NEXT: s_andn2_b64 s[48:49], s[48:49], exec |
| ; CHECK-NEXT: .LBB0_44: ; %Flow54 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[38:39] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[56:57], s[48:49] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_49 |
| ; CHECK-NEXT: ; %bb.45: ; %bb1 |
| ; CHECK-NEXT: s_and_saveexec_b64 s[58:59], s[46:47] |
| ; CHECK-NEXT: s_xor_b64 s[46:47], exec, s[58:59] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_47 |
| ; CHECK-NEXT: ; %bb.46: ; %bb1b |
| ; CHECK-NEXT: v_accvgpr_read_b32 v31, a31 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v32, a30 |
| ; CHECK-NEXT: v_lshrrev_b32_e32 v33, 15, v31 |
| ; CHECK-NEXT: v_alignbit_b32 v36, v31, v32, 15 |
| ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1504 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:1508 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:1512 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:1516 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:1520 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:1524 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:1528 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:1532 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:1536 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:1540 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:1544 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:1548 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:1552 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:1556 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:1560 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:1564 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:1568 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:1572 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:1576 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:1580 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:1584 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:1588 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:1592 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:1596 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:1600 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:1604 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:1608 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:1612 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:1616 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:1620 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:1624 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:1628 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:1488 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:1492 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:1480 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:1484 ; 4-byte Folded Reload |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v6, 17, v32 |
| ; CHECK-NEXT: v_and_b32_e32 v59, 0x7fffffff, v59 |
| ; CHECK-NEXT: v_and_b32_e32 v51, 0x7fffffff, v51 |
| ; CHECK-NEXT: v_and_b32_e32 v57, 0x7fffffff, v57 |
| ; CHECK-NEXT: v_and_b32_e32 v49, 0x7fffffff, v49 |
| ; CHECK-NEXT: v_and_b32_e32 v43, 0x7fffffff, v43 |
| ; CHECK-NEXT: v_and_b32_e32 v4, 0x7fffffff, v1 |
| ; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 0x7c, v10 |
| ; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v11, vcc |
| ; CHECK-NEXT: flat_store_short v[0:1], v33 |
| ; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 0x78, v10 |
| ; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v11, vcc |
| ; CHECK-NEXT: flat_store_dword v[0:1], v36 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 31, v42 |
| ; CHECK-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3 |
| ; CHECK-NEXT: v_or_b32_e32 v41, v4, v0 |
| ; CHECK-NEXT: v_lshrrev_b64 v[4:5], 14, v[2:3] |
| ; CHECK-NEXT: v_and_b32_e32 v9, 0x7fffffff, v9 |
| ; CHECK-NEXT: v_or_b32_e32 v5, v5, v6 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v6, 18, v2 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 13, v[8:9] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v6 |
| ; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:1472 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:1476 ; 4-byte Folded Reload |
| ; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 24, v10 |
| ; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v11, vcc |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:80 |
| ; CHECK-NEXT: v_lshrrev_b64 v[42:43], 1, v[42:43] |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 19, v8 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v8, a32 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v9, a33 |
| ; CHECK-NEXT: v_and_b32_e32 v9, 0x7fffffff, v9 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_and_b32_e32 v7, 0x7fffffff, v7 |
| ; CHECK-NEXT: v_lshrrev_b64 v[4:5], 12, v[6:7] |
| ; CHECK-NEXT: v_or_b32_e32 v5, v5, v2 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v6, 20, v6 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 11, v[8:9] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v6 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v6, a34 |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:64 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v7, a35 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 21, v8 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v8, a24 |
| ; CHECK-NEXT: v_and_b32_e32 v7, 0x7fffffff, v7 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v9, a25 |
| ; CHECK-NEXT: v_lshrrev_b64 v[4:5], 10, v[6:7] |
| ; CHECK-NEXT: v_and_b32_e32 v9, 0x7fffffff, v9 |
| ; CHECK-NEXT: v_or_b32_e32 v5, v5, v2 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v6 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 9, v[8:9] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v6 |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:48 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v6, a22 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v8 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 8, v[58:59] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v4 |
| ; CHECK-NEXT: flat_store_dwordx2 v[10:11], v[2:3] offset:64 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 24, v58 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 7, v[50:51] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v7, a23 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v8, a20 |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v4 |
| ; CHECK-NEXT: v_and_b32_e32 v7, 0x7fffffff, v7 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v9, a21 |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:32 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 25, v50 |
| ; CHECK-NEXT: v_lshrrev_b64 v[4:5], 6, v[6:7] |
| ; CHECK-NEXT: v_and_b32_e32 v9, 0x7fffffff, v9 |
| ; CHECK-NEXT: v_or_b32_e32 v5, v5, v2 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v6, 26, v6 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 5, v[8:9] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v6 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v6, a6 |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v7, a7 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 27, v8 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 4, v[56:57] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v4 |
| ; CHECK-NEXT: v_and_b32_e32 v7, 0x7fffffff, v7 |
| ; CHECK-NEXT: flat_store_dwordx2 v[10:11], v[2:3] offset:32 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 28, v56 |
| ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 3, v[6:7] |
| ; CHECK-NEXT: v_or_b32_e32 v3, v3, v4 |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 29, v6 |
| ; CHECK-NEXT: v_lshrrev_b64 v[0:1], 2, v[48:49] |
| ; CHECK-NEXT: v_or_b32_e32 v1, v1, v2 |
| ; CHECK-NEXT: flat_store_dwordx2 v[10:11], v[0:1] offset:16 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 30, v48 |
| ; CHECK-NEXT: v_or_b32_e32 v43, v43, v0 |
| ; CHECK-NEXT: flat_store_dwordx4 v[10:11], v[40:43] |
| ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 |
| ; CHECK-NEXT: ; kill: killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 |
| ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 |
| ; CHECK-NEXT: ; kill: killed $vgpr0_vgpr1 |
| ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 |
| ; CHECK-NEXT: ; kill: killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 |
| ; CHECK-NEXT: .LBB0_47: ; %Flow49 |
| ; CHECK-NEXT: s_andn2_saveexec_b64 s[46:47], s[46:47] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_49 |
| ; CHECK-NEXT: ; %bb.48: ; %bb1a |
| ; CHECK-NEXT: buffer_load_dword a32, off, s[0:3], s32 offset:1136 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a33, off, s[0:3], s32 offset:1140 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a34, off, s[0:3], s32 offset:1144 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a35, off, s[0:3], s32 offset:1148 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a36, off, s[0:3], s32 offset:1152 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a37, off, s[0:3], s32 offset:1156 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a38, off, s[0:3], s32 offset:1160 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a39, off, s[0:3], s32 offset:1164 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a40, off, s[0:3], s32 offset:1168 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a41, off, s[0:3], s32 offset:1172 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a42, off, s[0:3], s32 offset:1176 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a43, off, s[0:3], s32 offset:1180 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a44, off, s[0:3], s32 offset:1184 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a45, off, s[0:3], s32 offset:1188 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a46, off, s[0:3], s32 offset:1192 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a47, off, s[0:3], s32 offset:1196 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a48, off, s[0:3], s32 offset:1200 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a49, off, s[0:3], s32 offset:1204 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a50, off, s[0:3], s32 offset:1208 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a51, off, s[0:3], s32 offset:1212 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a52, off, s[0:3], s32 offset:1216 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a53, off, s[0:3], s32 offset:1220 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a54, off, s[0:3], s32 offset:1224 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a55, off, s[0:3], s32 offset:1228 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a56, off, s[0:3], s32 offset:1232 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a57, off, s[0:3], s32 offset:1236 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a58, off, s[0:3], s32 offset:1240 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a59, off, s[0:3], s32 offset:1244 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a60, off, s[0:3], s32 offset:1248 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a61, off, s[0:3], s32 offset:1252 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a62, off, s[0:3], s32 offset:1256 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a63, off, s[0:3], s32 offset:1260 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a0, off, s[0:3], s32 offset:1272 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a1, off, s[0:3], s32 offset:1276 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a2, off, s[0:3], s32 offset:1280 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a3, off, s[0:3], s32 offset:1284 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a4, off, s[0:3], s32 offset:1288 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a5, off, s[0:3], s32 offset:1292 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a6, off, s[0:3], s32 offset:1296 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a7, off, s[0:3], s32 offset:1300 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a8, off, s[0:3], s32 offset:1304 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a9, off, s[0:3], s32 offset:1308 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a10, off, s[0:3], s32 offset:1312 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a11, off, s[0:3], s32 offset:1316 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a12, off, s[0:3], s32 offset:1320 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a13, off, s[0:3], s32 offset:1324 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a14, off, s[0:3], s32 offset:1328 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a15, off, s[0:3], s32 offset:1332 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a16, off, s[0:3], s32 offset:1336 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a17, off, s[0:3], s32 offset:1340 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a18, off, s[0:3], s32 offset:1344 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a19, off, s[0:3], s32 offset:1348 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a20, off, s[0:3], s32 offset:1352 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a21, off, s[0:3], s32 offset:1356 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a22, off, s[0:3], s32 offset:1360 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a23, off, s[0:3], s32 offset:1364 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a24, off, s[0:3], s32 offset:1368 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a25, off, s[0:3], s32 offset:1372 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a26, off, s[0:3], s32 offset:1376 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a27, off, s[0:3], s32 offset:1380 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a28, off, s[0:3], s32 offset:1384 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a29, off, s[0:3], s32 offset:1388 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a30, off, s[0:3], s32 offset:1392 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a31, off, s[0:3], s32 offset:1396 ; 4-byte Folded Reload |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a30 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v31, a31 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a32 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v28, a28 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v31, a33 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v32, a34 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v33, a35 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v34, a36 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v36, a38 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v32, a28 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v28, a60 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a62 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v20, a20 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v21, a21 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v22, a22 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v23, a23 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v24, a24 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v25, a25 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v26, a26 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v27, a27 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v29, a29 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v37, a39 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v38, a40 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v39, a41 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v59, a61 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v36, v28, v32, s[44:45] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v33, a31 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v31, a63 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v34, a30 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a62 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v37, v59, v29, s[44:45] |
| ; CHECK-NEXT: v_cndmask_b32_e64 v39, v31, v33, s[40:41] |
| ; CHECK-NEXT: v_cndmask_b32_e64 v38, v30, v34, s[40:41] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v31, a27 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v16, a16 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v17, a17 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v18, a18 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v19, a19 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v27, a23 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v26, a22 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v25, a21 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v24, a20 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v23, a19 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v22, a18 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v21, a17 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v20, a16 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v16, a48 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v49, a51 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v29, a25 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v28, a24 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v20, a52 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v21, a53 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v22, a54 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v23, a55 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v24, a56 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v25, a57 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v26, a58 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v27, a59 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v49, v25, v29, s[42:43] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v28, a24 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v17, a49 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v18, a50 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v19, a51 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v27, a23 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v26, a22 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v24, a20 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v23, a19 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v22, a18 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v21, a17 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v20, a16 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v32, a30 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v16, a48 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v26, a58 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v48, a50 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v50, a52 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v51, a53 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a26 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v25, a21 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v17, a49 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v18, a50 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v19, a51 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v20, a52 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v21, a53 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v22, a54 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v23, a55 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v24, a56 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v33, a27 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v27, a59 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v34, a26 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v48, v24, v28, s[42:43] |
| ; CHECK-NEXT: v_cndmask_b32_e64 v51, v27, v33, s[26:27] |
| ; CHECK-NEXT: v_cndmask_b32_e64 v50, v26, v34, s[26:27] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v31, a23 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v16, a48 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v53, a55 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v29, a21 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v24, a16 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v20, a52 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v21, a53 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v22, a54 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v23, a55 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v53, v21, v29, s[28:29] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v24, a20 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v17, a49 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v18, a50 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v19, a51 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v20, a16 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v32, a26 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v23, a19 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v22, a18 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v16, a48 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a22 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v28, a20 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v18, a50 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v19, a51 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v33, a23 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v22, a54 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v52, a54 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v54, a56 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v55, a57 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v20, a52 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v32, a22 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v23, a55 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v31, a19 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v34, a22 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v18, a50 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v40, a42 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v41, a43 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v42, a44 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v43, a45 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v27, a19 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v26, a18 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v25, a17 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v21, a17 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v17, a49 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v52, v20, v24, s[28:29] |
| ; CHECK-NEXT: v_cndmask_b32_e64 v55, v23, v33, s[22:23] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a18 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v29, a17 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v28, a16 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v54, v22, v34, s[22:23] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v20, a16 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v16, a48 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v33, a19 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v19, a51 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v34, a18 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v1, a1 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v2, a2 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v3, a3 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v4, a4 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v5, a5 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v6, a6 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v7, a7 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v8, a8 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v9, a9 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v10, a10 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v11, a11 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v12, a12 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v13, a13 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v14, a14 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v15, a15 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v41, v17, v29, s[24:25] |
| ; CHECK-NEXT: v_cndmask_b32_e64 v40, v16, v20, s[24:25] |
| ; CHECK-NEXT: v_cndmask_b32_e64 v43, v19, v33, s[18:19] |
| ; CHECK-NEXT: v_cndmask_b32_e64 v42, v18, v34, s[18:19] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v31, a15 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v0, a32 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a14 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v12, a44 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v13, a45 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v29, a13 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v28, a12 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v27, a11 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v26, a10 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v25, a9 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v24, a8 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v23, a7 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v22, a6 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v21, a5 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v20, a4 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v17, a1 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v16, a0 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a12 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v17, v13, v29, s[20:21] |
| ; CHECK-NEXT: v_cndmask_b32_e64 v16, v12, v30, s[20:21] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v31, a11 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v32, a18 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v9, a41 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a10 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v29, a9 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v28, a8 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v27, a7 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v26, a6 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v25, a5 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v24, a4 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v23, a3 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v22, a2 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v21, a1 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v33, a15 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v21, v9, v29, s[16:17] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a8 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v8, a40 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v15, a47 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v19, a3 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v32, a14 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v20, a0 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v29, a7 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v28, a6 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v27, a5 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v26, a4 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v25, a3 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v24, a2 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v19, v15, v33, s[14:15] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v33, a11 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v20, v8, v30, s[16:17] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v31, a7 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v5, a37 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v11, a43 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v23, a1 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v32, a10 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a6 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v29, a5 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v28, a4 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v27, a3 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v26, a2 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v25, a1 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v23, v11, v33, s[10:11] |
| ; CHECK-NEXT: v_cndmask_b32_e64 v25, v5, v29, s[12:13] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a4 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v33, a7 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v4, a36 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v7, a39 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v24, a0 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v27, a1 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v32, a6 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v24, v4, v30, s[12:13] |
| ; CHECK-NEXT: v_cndmask_b32_e64 v27, v7, v33, s[6:7] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v33, a3 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v29, a3 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v28, a2 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a0 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v30, a2 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v29, a1 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v28, a0 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v29, a1 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v28, a0 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v1, a33 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v28, a0 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v29, v1, v29, s[4:5] |
| ; CHECK-NEXT: v_cndmask_b32_e64 v28, v0, v28, s[4:5] |
| ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload |
| ; CHECK-NEXT: v_accvgpr_read_b32 v14, a46 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v18, a2 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v34, a14 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v2, a34 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v3, a35 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v10, a42 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v18, v14, v34, s[14:15] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v22, a0 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v34, a10 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v31, a1 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v6, a38 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v22, v10, v34, s[10:11] |
| ; CHECK-NEXT: v_accvgpr_read_b32 v26, a0 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v34, a6 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v31, v3, v33, s[8:9] |
| ; CHECK-NEXT: v_cndmask_b32_e64 v30, v2, v30, s[8:9] |
| ; CHECK-NEXT: v_cndmask_b32_e64 v26, v6, v34, s[6:7] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[28:31] |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[24:27] offset:16 |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[20:23] offset:32 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v35, a37 |
| ; CHECK-NEXT: v_add_co_u32_e32 v20, vcc, 48, v0 |
| ; CHECK-NEXT: v_addc_co_u32_e32 v21, vcc, 0, v1, vcc |
| ; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, 0x70, v0 |
| ; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc |
| ; CHECK-NEXT: v_accvgpr_read_b32 v44, a46 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v45, a47 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v46, a48 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v47, a49 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v56, a58 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v57, a59 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v58, a60 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v60, a62 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v61, a63 |
| ; CHECK-NEXT: v_accvgpr_read_b32 v32, a2 |
| ; CHECK-NEXT: flat_store_dwordx4 v[20:21], v[16:19] |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[40:43] offset:64 |
| ; CHECK-NEXT: flat_store_dwordx4 v[20:21], v[52:55] offset:32 |
| ; CHECK-NEXT: flat_store_dwordx4 v[20:21], v[48:51] offset:48 |
| ; CHECK-NEXT: flat_store_dwordx4 v[4:5], v[36:39] |
| ; CHECK-NEXT: .LBB0_49: ; %UnifiedReturnBlock |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[56:57] |
| ; CHECK-NEXT: buffer_load_dword a63, off, s[0:3], s32 offset:796 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a62, off, s[0:3], s32 offset:800 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a61, off, s[0:3], s32 offset:804 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a60, off, s[0:3], s32 offset:808 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a59, off, s[0:3], s32 offset:812 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a58, off, s[0:3], s32 offset:816 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a57, off, s[0:3], s32 offset:820 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a56, off, s[0:3], s32 offset:824 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a55, off, s[0:3], s32 offset:828 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a54, off, s[0:3], s32 offset:832 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a53, off, s[0:3], s32 offset:836 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a52, off, s[0:3], s32 offset:840 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a51, off, s[0:3], s32 offset:844 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a50, off, s[0:3], s32 offset:848 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a49, off, s[0:3], s32 offset:852 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a48, off, s[0:3], s32 offset:856 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a47, off, s[0:3], s32 offset:860 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a46, off, s[0:3], s32 offset:864 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a45, off, s[0:3], s32 offset:868 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a44, off, s[0:3], s32 offset:872 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a43, off, s[0:3], s32 offset:876 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a42, off, s[0:3], s32 offset:880 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a41, off, s[0:3], s32 offset:884 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a40, off, s[0:3], s32 offset:888 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a39, off, s[0:3], s32 offset:892 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a38, off, s[0:3], s32 offset:896 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a37, off, s[0:3], s32 offset:900 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a36, off, s[0:3], s32 offset:904 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a35, off, s[0:3], s32 offset:908 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a34, off, s[0:3], s32 offset:912 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a33, off, s[0:3], s32 offset:916 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword a32, off, s[0:3], s32 offset:920 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:924 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:928 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:932 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:936 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:940 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:944 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:948 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:952 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:956 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:960 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:964 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:968 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:972 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:976 ; 4-byte Folded Reload |
| ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:980 ; 4-byte Folded Reload |
| ; CHECK-NEXT: v_readlane_b32 s30, v63, 14 |
| ; CHECK-NEXT: v_readlane_b32 s31, v63, 15 |
| ; CHECK-NEXT: v_readlane_b32 s55, v63, 13 |
| ; CHECK-NEXT: v_readlane_b32 s54, v63, 12 |
| ; CHECK-NEXT: v_readlane_b32 s53, v63, 11 |
| ; CHECK-NEXT: v_readlane_b32 s52, v63, 10 |
| ; CHECK-NEXT: v_readlane_b32 s51, v63, 9 |
| ; CHECK-NEXT: v_readlane_b32 s50, v63, 8 |
| ; CHECK-NEXT: v_readlane_b32 s49, v63, 7 |
| ; CHECK-NEXT: v_readlane_b32 s48, v63, 6 |
| ; CHECK-NEXT: v_readlane_b32 s39, v63, 5 |
| ; CHECK-NEXT: v_readlane_b32 s38, v63, 4 |
| ; CHECK-NEXT: v_readlane_b32 s37, v63, 3 |
| ; CHECK-NEXT: v_readlane_b32 s36, v63, 2 |
| ; CHECK-NEXT: v_readlane_b32 s35, v63, 1 |
| ; CHECK-NEXT: v_readlane_b32 s34, v63, 0 |
| ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 |
| ; CHECK-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:1632 ; 4-byte Folded Reload |
| ; CHECK-NEXT: s_mov_b64 exec, s[4:5] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| <16 x double> %da, <16 x double> %db, |
| <16 x i63> %v, <16 x i63> %w, <16 x i63> %a, |
| ptr %sink, i32 %disc, i1 %c) #0 { |
| entry: |
| switch i32 %disc, label %bb0 [ |
| i32 2, label %bb1 |
| i32 3, label %bb2 |
| ] |
| bb0: |
| br i1 %c, label %bb0a, label %bb0b |
| bb0a: |
| store <16 x i63> %a, ptr %sink |
| ret void |
| bb0b: |
| %ld = call <16 x i63> @llvm.masked.load.v16i63.p0(ptr %p, <16 x i1> %m, <16 x i63> %pt) |
| store <16 x i63> %ld, ptr %sink |
| ret void |
| bb1: |
| br i1 %c, label %bb1a, label %bb1b |
| bb1a: |
| %sel = select <16 x i1> %sc, <16 x double> %da, <16 x double> %db |
| store <16 x double> %sel, ptr %sink |
| ret void |
| bb1b: |
| store <16 x i63> %v, ptr %sink |
| ret void |
| bb2: |
| %x = xor <16 x i63> %a, %w |
| store <16 x i63> %x, ptr %sink |
| ret void |
| } |
| |
| attributes #0 = { nounwind } |