blob: f08b68f9b8c382a80aeb266c4be58d310b2ccefe [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s
; Regression test ensuring that empty subranges are not prematurely eliminated.
declare <16 x i63> @llvm.masked.load.v16i63.p0(ptr, <16 x i1>, <16 x i63>)
define void @f(ptr %p, <16 x i1> %m, <16 x i63> %pt, <16 x i1> %sc,
; CHECK-LABEL: f:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:1632 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:980 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:976 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:972 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:968 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:964 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:960 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:956 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:952 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:948 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:944 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:940 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:936 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:932 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:928 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:924 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a32, off, s[0:3], s32 offset:920 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a33, off, s[0:3], s32 offset:916 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a34, off, s[0:3], s32 offset:912 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a35, off, s[0:3], s32 offset:908 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a36, off, s[0:3], s32 offset:904 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a37, off, s[0:3], s32 offset:900 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a38, off, s[0:3], s32 offset:896 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a39, off, s[0:3], s32 offset:892 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a40, off, s[0:3], s32 offset:888 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a41, off, s[0:3], s32 offset:884 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a42, off, s[0:3], s32 offset:880 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a43, off, s[0:3], s32 offset:876 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a44, off, s[0:3], s32 offset:872 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a45, off, s[0:3], s32 offset:868 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a46, off, s[0:3], s32 offset:864 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a47, off, s[0:3], s32 offset:860 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a48, off, s[0:3], s32 offset:856 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a49, off, s[0:3], s32 offset:852 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a50, off, s[0:3], s32 offset:848 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a51, off, s[0:3], s32 offset:844 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a52, off, s[0:3], s32 offset:840 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a53, off, s[0:3], s32 offset:836 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a54, off, s[0:3], s32 offset:832 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a55, off, s[0:3], s32 offset:828 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a56, off, s[0:3], s32 offset:824 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a57, off, s[0:3], s32 offset:820 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a58, off, s[0:3], s32 offset:816 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a59, off, s[0:3], s32 offset:812 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a60, off, s[0:3], s32 offset:808 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a61, off, s[0:3], s32 offset:804 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a62, off, s[0:3], s32 offset:800 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a63, off, s[0:3], s32 offset:796 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v63, s34, 0
; CHECK-NEXT: v_writelane_b32 v63, s35, 1
; CHECK-NEXT: v_writelane_b32 v63, s36, 2
; CHECK-NEXT: v_writelane_b32 v63, s37, 3
; CHECK-NEXT: v_writelane_b32 v63, s38, 4
; CHECK-NEXT: v_writelane_b32 v63, s39, 5
; CHECK-NEXT: v_writelane_b32 v63, s48, 6
; CHECK-NEXT: v_writelane_b32 v63, s49, 7
; CHECK-NEXT: v_writelane_b32 v63, s50, 8
; CHECK-NEXT: v_writelane_b32 v63, s51, 9
; CHECK-NEXT: v_writelane_b32 v63, s52, 10
; CHECK-NEXT: v_writelane_b32 v63, s53, 11
; CHECK-NEXT: v_writelane_b32 v63, s54, 12
; CHECK-NEXT: v_writelane_b32 v63, s55, 13
; CHECK-NEXT: v_writelane_b32 v63, s30, 14
; CHECK-NEXT: v_writelane_b32 v63, s31, 15
; CHECK-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:1264 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:1268 ; 4-byte Folded Spill
; CHECK-NEXT: v_mov_b32_e32 v30, v29
; CHECK-NEXT: buffer_load_dword a43, off, s[0:3], s32 offset:368
; CHECK-NEXT: buffer_load_dword a42, off, s[0:3], s32 offset:364
; CHECK-NEXT: buffer_load_dword a41, off, s[0:3], s32 offset:360
; CHECK-NEXT: buffer_load_dword a40, off, s[0:3], s32 offset:356
; CHECK-NEXT: buffer_load_dword a39, off, s[0:3], s32 offset:352
; CHECK-NEXT: buffer_load_dword a38, off, s[0:3], s32 offset:348
; CHECK-NEXT: buffer_load_dword a37, off, s[0:3], s32 offset:344
; CHECK-NEXT: buffer_load_dword a36, off, s[0:3], s32 offset:340
; CHECK-NEXT: buffer_load_dword a35, off, s[0:3], s32 offset:336
; CHECK-NEXT: buffer_load_dword a34, off, s[0:3], s32 offset:332
; CHECK-NEXT: buffer_load_dword a33, off, s[0:3], s32 offset:328
; CHECK-NEXT: buffer_load_dword a32, off, s[0:3], s32 offset:324
; CHECK-NEXT: buffer_load_dword a31, off, s[0:3], s32 offset:320
; CHECK-NEXT: buffer_load_dword a30, off, s[0:3], s32 offset:316
; CHECK-NEXT: buffer_load_dword a29, off, s[0:3], s32 offset:312
; CHECK-NEXT: buffer_load_dword a28, off, s[0:3], s32 offset:308
; CHECK-NEXT: buffer_load_dword a27, off, s[0:3], s32 offset:304
; CHECK-NEXT: buffer_load_dword a26, off, s[0:3], s32 offset:300
; CHECK-NEXT: buffer_load_dword a25, off, s[0:3], s32 offset:296
; CHECK-NEXT: buffer_load_dword a24, off, s[0:3], s32 offset:292
; CHECK-NEXT: buffer_load_dword a23, off, s[0:3], s32 offset:288
; CHECK-NEXT: buffer_load_dword a22, off, s[0:3], s32 offset:284
; CHECK-NEXT: buffer_load_dword a21, off, s[0:3], s32 offset:280
; CHECK-NEXT: buffer_load_dword a20, off, s[0:3], s32 offset:276
; CHECK-NEXT: buffer_load_dword a19, off, s[0:3], s32 offset:272
; CHECK-NEXT: buffer_load_dword a18, off, s[0:3], s32 offset:268
; CHECK-NEXT: v_and_b32_e32 v17, 1, v17
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v17
; CHECK-NEXT: v_and_b32_e32 v16, 1, v16
; CHECK-NEXT: v_cmp_eq_u32_e64 s[56:57], 1, v16
; CHECK-NEXT: s_mov_b64 s[48:49], 0
; CHECK-NEXT: s_mov_b64 s[52:53], 0
; CHECK-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:1400 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:1404 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:1496 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:1500 ; 4-byte Folded Spill
; CHECK-NEXT: v_mov_b32_e32 v30, v27
; CHECK-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:1408 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:1412 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:1416 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:1420 ; 4-byte Folded Spill
; CHECK-NEXT: v_mov_b32_e32 v26, v25
; CHECK-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:1432 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:1436 ; 4-byte Folded Spill
; CHECK-NEXT: v_mov_b32_e32 v26, v23
; CHECK-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:1424 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:1428 ; 4-byte Folded Spill
; CHECK-NEXT: v_mov_b32_e32 v26, v21
; CHECK-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:1440 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:1444 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:1448 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:1452 ; 4-byte Folded Spill
; CHECK-NEXT: v_mov_b32_e32 v20, v19
; CHECK-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:1456 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:1460 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:1464 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:1468 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:992 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:996 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_load_ushort v0, off, s[0:3], s32 offset:136
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:132
; CHECK-NEXT: buffer_load_ushort v18, off, s[0:3], s32 offset:128
; CHECK-NEXT: buffer_load_ushort v19, off, s[0:3], s32 offset:124
; CHECK-NEXT: buffer_load_ushort v20, off, s[0:3], s32 offset:120
; CHECK-NEXT: buffer_load_ushort v21, off, s[0:3], s32 offset:116
; CHECK-NEXT: buffer_load_ushort v23, off, s[0:3], s32 offset:112
; CHECK-NEXT: buffer_load_ushort v32, off, s[0:3], s32 offset:108
; CHECK-NEXT: buffer_load_ushort v25, off, s[0:3], s32 offset:104
; CHECK-NEXT: buffer_load_ushort v33, off, s[0:3], s32 offset:100
; CHECK-NEXT: buffer_load_ushort v27, off, s[0:3], s32 offset:96
; CHECK-NEXT: buffer_load_ushort v34, off, s[0:3], s32 offset:92
; CHECK-NEXT: buffer_load_ushort v26, off, s[0:3], s32 offset:88
; CHECK-NEXT: buffer_load_ushort v29, off, s[0:3], s32 offset:84
; CHECK-NEXT: buffer_load_ushort v31, off, s[0:3], s32 offset:80
; CHECK-NEXT: buffer_load_ushort v35, off, s[0:3], s32 offset:76
; CHECK-NEXT: s_waitcnt vmcnt(15)
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
; CHECK-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v15
; CHECK-NEXT: v_cmp_eq_u32_e64 s[58:59], 1, v0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v14
; CHECK-NEXT: v_cmp_eq_u32_e64 s[60:61], 1, v0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v13
; CHECK-NEXT: v_cmp_eq_u32_e64 s[62:63], 1, v0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v12
; CHECK-NEXT: v_cmp_eq_u32_e64 s[72:73], 1, v0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v11
; CHECK-NEXT: v_cmp_eq_u32_e64 s[74:75], 1, v0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v10
; CHECK-NEXT: v_cmp_eq_u32_e64 s[76:77], 1, v0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v9
; CHECK-NEXT: v_cmp_eq_u32_e64 s[78:79], 1, v0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v8
; CHECK-NEXT: v_cmp_eq_u32_e64 s[88:89], 1, v0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v7
; CHECK-NEXT: v_cmp_eq_u32_e64 s[90:91], 1, v0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v6
; CHECK-NEXT: v_cmp_eq_u32_e64 s[92:93], 1, v0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v5
; CHECK-NEXT: v_cmp_eq_u32_e64 s[94:95], 1, v0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v4
; CHECK-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v3
; CHECK-NEXT: s_waitcnt vmcnt(14)
; CHECK-NEXT: v_and_b32_e32 v1, 1, v1
; CHECK-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v2
; CHECK-NEXT: v_cmp_eq_u32_e64 s[44:45], 1, v1
; CHECK-NEXT: v_cmp_eq_u32_e64 s[36:37], 1, v0
; CHECK-NEXT: buffer_load_dword a49, off, s[0:3], s32 offset:392
; CHECK-NEXT: buffer_load_dword a48, off, s[0:3], s32 offset:388
; CHECK-NEXT: buffer_load_dword a47, off, s[0:3], s32 offset:384
; CHECK-NEXT: buffer_load_dword a46, off, s[0:3], s32 offset:380
; CHECK-NEXT: buffer_load_dword a45, off, s[0:3], s32 offset:376
; CHECK-NEXT: buffer_load_dword a44, off, s[0:3], s32 offset:372
; CHECK-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:792
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:788
; CHECK-NEXT: s_waitcnt vmcnt(21)
; CHECK-NEXT: v_and_b32_e32 v17, 1, v18
; CHECK-NEXT: s_waitcnt vmcnt(20)
; CHECK-NEXT: v_and_b32_e32 v18, 1, v19
; CHECK-NEXT: s_waitcnt vmcnt(19)
; CHECK-NEXT: v_and_b32_e32 v19, 1, v20
; CHECK-NEXT: s_waitcnt vmcnt(18)
; CHECK-NEXT: v_and_b32_e32 v20, 1, v21
; CHECK-NEXT: s_waitcnt vmcnt(17)
; CHECK-NEXT: v_and_b32_e32 v21, 1, v23
; CHECK-NEXT: s_waitcnt vmcnt(16)
; CHECK-NEXT: v_and_b32_e32 v23, 1, v32
; CHECK-NEXT: s_waitcnt vmcnt(15)
; CHECK-NEXT: v_and_b32_e32 v32, 1, v25
; CHECK-NEXT: s_waitcnt vmcnt(14)
; CHECK-NEXT: v_and_b32_e32 v25, 1, v33
; CHECK-NEXT: s_waitcnt vmcnt(13)
; CHECK-NEXT: v_and_b32_e32 v33, 1, v27
; CHECK-NEXT: s_waitcnt vmcnt(12)
; CHECK-NEXT: v_and_b32_e32 v27, 1, v34
; CHECK-NEXT: s_waitcnt vmcnt(11)
; CHECK-NEXT: v_and_b32_e32 v34, 1, v26
; CHECK-NEXT: s_waitcnt vmcnt(10)
; CHECK-NEXT: v_and_b32_e32 v26, 1, v29
; CHECK-NEXT: s_waitcnt vmcnt(9)
; CHECK-NEXT: v_and_b32_e32 v29, 1, v31
; CHECK-NEXT: s_waitcnt vmcnt(8)
; CHECK-NEXT: v_and_b32_e32 v31, 1, v35
; CHECK-NEXT: v_cmp_eq_u32_e64 s[14:15], 1, v32
; CHECK-NEXT: v_cmp_eq_u32_e64 s[10:11], 1, v33
; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v34
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v31
; CHECK-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:264
; CHECK-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:260
; CHECK-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:256
; CHECK-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:252
; CHECK-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:248
; CHECK-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:244
; CHECK-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:240
; CHECK-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:236
; CHECK-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:232
; CHECK-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:228
; CHECK-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:224
; CHECK-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:220
; CHECK-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:216
; CHECK-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:212
; CHECK-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:208
; CHECK-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:204
; CHECK-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:200
; CHECK-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:196
; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:192
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:188
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:184
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:180
; CHECK-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:176
; CHECK-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:172
; CHECK-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:168
; CHECK-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:164
; CHECK-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:160
; CHECK-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:156
; CHECK-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:152
; CHECK-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:148
; CHECK-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:144
; CHECK-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:140
; CHECK-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v18
; CHECK-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v20
; CHECK-NEXT: v_cmp_eq_u32_e64 s[26:27], 1, v17
; CHECK-NEXT: v_cmp_eq_u32_e64 s[22:23], 1, v19
; CHECK-NEXT: v_cmp_eq_u32_e64 s[18:19], 1, v21
; CHECK-NEXT: v_cmp_eq_u32_e64 s[24:25], 1, v23
; CHECK-NEXT: v_cmp_eq_u32_e64 s[20:21], 1, v25
; CHECK-NEXT: v_cmp_eq_u32_e64 s[16:17], 1, v27
; CHECK-NEXT: v_cmp_eq_u32_e64 s[12:13], 1, v26
; CHECK-NEXT: v_cmp_eq_u32_e64 s[8:9], 1, v29
; CHECK-NEXT: buffer_store_dword a18, off, s[0:3], s32 offset:1136 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword a19, off, s[0:3], s32 offset:1140 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a20, off, s[0:3], s32 offset:1144 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a21, off, s[0:3], s32 offset:1148 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a22, off, s[0:3], s32 offset:1152 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a23, off, s[0:3], s32 offset:1156 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a24, off, s[0:3], s32 offset:1160 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a25, off, s[0:3], s32 offset:1164 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a26, off, s[0:3], s32 offset:1168 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a27, off, s[0:3], s32 offset:1172 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a28, off, s[0:3], s32 offset:1176 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a29, off, s[0:3], s32 offset:1180 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a30, off, s[0:3], s32 offset:1184 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a31, off, s[0:3], s32 offset:1188 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a32, off, s[0:3], s32 offset:1192 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a33, off, s[0:3], s32 offset:1196 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a34, off, s[0:3], s32 offset:1200 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a35, off, s[0:3], s32 offset:1204 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a36, off, s[0:3], s32 offset:1208 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a37, off, s[0:3], s32 offset:1212 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a38, off, s[0:3], s32 offset:1216 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a39, off, s[0:3], s32 offset:1220 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a40, off, s[0:3], s32 offset:1224 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a41, off, s[0:3], s32 offset:1228 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a42, off, s[0:3], s32 offset:1232 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a43, off, s[0:3], s32 offset:1236 ; 4-byte Folded Spill
; CHECK-NEXT: s_waitcnt vmcnt(60)
; CHECK-NEXT: buffer_store_dword a44, off, s[0:3], s32 offset:1240 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a45, off, s[0:3], s32 offset:1244 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a46, off, s[0:3], s32 offset:1248 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a47, off, s[0:3], s32 offset:1252 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a48, off, s[0:3], s32 offset:1256 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a49, off, s[0:3], s32 offset:1260 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_load_dword a30, off, s[0:3], s32 offset:712
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_load_dword a28, off, s[0:3], s32 offset:708
; CHECK-NEXT: buffer_load_dword a26, off, s[0:3], s32 offset:704
; CHECK-NEXT: buffer_load_dword a24, off, s[0:3], s32 offset:700
; CHECK-NEXT: buffer_load_dword a22, off, s[0:3], s32 offset:696
; CHECK-NEXT: buffer_load_dword a20, off, s[0:3], s32 offset:692
; CHECK-NEXT: buffer_load_dword a18, off, s[0:3], s32 offset:688
; CHECK-NEXT: buffer_load_dword a16, off, s[0:3], s32 offset:684
; CHECK-NEXT: buffer_load_dword a14, off, s[0:3], s32 offset:680
; CHECK-NEXT: buffer_load_dword a12, off, s[0:3], s32 offset:676
; CHECK-NEXT: buffer_load_dword a10, off, s[0:3], s32 offset:672
; CHECK-NEXT: buffer_load_dword a8, off, s[0:3], s32 offset:668
; CHECK-NEXT: buffer_load_dword a6, off, s[0:3], s32 offset:664
; CHECK-NEXT: buffer_load_dword a4, off, s[0:3], s32 offset:660
; CHECK-NEXT: ; kill: def $agpr1 killed $sgpr4 killed $exec
; CHECK-NEXT: ; kill: def $agpr3 killed $sgpr4 killed $exec
; CHECK-NEXT: ; kill: def $agpr5 killed $sgpr4 killed $exec
; CHECK-NEXT: ; kill: def $agpr7 killed $sgpr4 killed $exec
; CHECK-NEXT: ; kill: def $agpr9 killed $sgpr4 killed $exec
; CHECK-NEXT: ; kill: def $agpr11 killed $sgpr4 killed $exec
; CHECK-NEXT: ; kill: def $agpr13 killed $sgpr4 killed $exec
; CHECK-NEXT: ; kill: def $agpr15 killed $sgpr4 killed $exec
; CHECK-NEXT: ; kill: def $agpr17 killed $sgpr4 killed $exec
; CHECK-NEXT: ; kill: def $agpr19 killed $sgpr4 killed $exec
; CHECK-NEXT: ; kill: def $agpr21 killed $sgpr4 killed $exec
; CHECK-NEXT: ; kill: def $agpr23 killed $sgpr4 killed $exec
; CHECK-NEXT: ; kill: def $agpr25 killed $sgpr4 killed $exec
; CHECK-NEXT: ; kill: def $agpr27 killed $sgpr4 killed $exec
; CHECK-NEXT: ; kill: def $agpr29 killed $sgpr4 killed $exec
; CHECK-NEXT: ; kill: def $agpr31 killed $sgpr4 killed $exec
; CHECK-NEXT: buffer_load_dword a2, off, s[0:3], s32 offset:656
; CHECK-NEXT: buffer_load_dword a0, off, s[0:3], s32 offset:652
; CHECK-NEXT: s_waitcnt vmcnt(48)
; CHECK-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:1272 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:1276 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:1280 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:1284 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:1288 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:1292 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v36, off, s[0:3], s32 offset:1296 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v37, off, s[0:3], s32 offset:1300 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v38, off, s[0:3], s32 offset:1304 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:1308 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:1312 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:1316 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:1320 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:1324 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:1328 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:1332 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:1336 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:1340 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:1344 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:1348 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:1352 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:1356 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v52, off, s[0:3], s32 offset:1360 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:1364 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:1368 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:1372 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:1376 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:1380 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:1384 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:1388 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:1392 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:1396 ; 4-byte Folded Spill
; CHECK-NEXT: v_and_b32_e32 v1, 1, v1
; CHECK-NEXT: v_cmp_ne_u32_e64 s[46:47], 1, v1
; CHECK-NEXT: s_waitcnt vmcnt(32)
; CHECK-NEXT: buffer_store_dword a0, off, s[0:3], s32 offset:1000 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword a1, off, s[0:3], s32 offset:1004 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a2, off, s[0:3], s32 offset:1008 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a3, off, s[0:3], s32 offset:1012 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a4, off, s[0:3], s32 offset:1016 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a5, off, s[0:3], s32 offset:1020 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a6, off, s[0:3], s32 offset:1024 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a7, off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a8, off, s[0:3], s32 offset:1032 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a9, off, s[0:3], s32 offset:1036 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a10, off, s[0:3], s32 offset:1040 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a11, off, s[0:3], s32 offset:1044 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a12, off, s[0:3], s32 offset:1048 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a13, off, s[0:3], s32 offset:1052 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a14, off, s[0:3], s32 offset:1056 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a15, off, s[0:3], s32 offset:1060 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a16, off, s[0:3], s32 offset:1064 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a17, off, s[0:3], s32 offset:1068 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a18, off, s[0:3], s32 offset:1072 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a19, off, s[0:3], s32 offset:1076 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a20, off, s[0:3], s32 offset:1080 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a21, off, s[0:3], s32 offset:1084 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a22, off, s[0:3], s32 offset:1088 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a23, off, s[0:3], s32 offset:1092 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a24, off, s[0:3], s32 offset:1096 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a25, off, s[0:3], s32 offset:1100 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a26, off, s[0:3], s32 offset:1104 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a27, off, s[0:3], s32 offset:1108 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a28, off, s[0:3], s32 offset:1112 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a29, off, s[0:3], s32 offset:1116 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a30, off, s[0:3], s32 offset:1120 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword a31, off, s[0:3], s32 offset:1124 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:412
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_load_dword a6, off, s[0:3], s32 offset:408
; CHECK-NEXT: buffer_load_dword a20, off, s[0:3], s32 offset:416
; CHECK-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:404
; CHECK-NEXT: buffer_load_dword a22, off, s[0:3], s32 offset:420
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:400
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:396
; CHECK-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:424
; CHECK-NEXT: buffer_load_dword a32, off, s[0:3], s32 offset:440
; CHECK-NEXT: buffer_load_dword a34, off, s[0:3], s32 offset:436
; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:444
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1472 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1476 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_load_dword a24, off, s[0:3], s32 offset:432
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:448
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1480 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1484 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:428
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:452
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1488 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1492 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:784
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:780
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:984 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:988 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_load_dword a28, off, s[0:3], s32 offset:8
; CHECK-NEXT: buffer_load_dword a26, off, s[0:3], s32 offset:4
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1128 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1132 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_load_dword a30, off, s[0:3], s32 offset:456
; CHECK-NEXT: buffer_load_dword a36, off, s[0:3], s32 offset:584
; CHECK-NEXT: buffer_load_dword a40, off, s[0:3], s32 offset:580
; CHECK-NEXT: buffer_load_dword a38, off, s[0:3], s32 offset:576
; CHECK-NEXT: buffer_load_dword a42, off, s[0:3], s32 offset:572
; CHECK-NEXT: buffer_load_dword a44, off, s[0:3], s32 offset:568
; CHECK-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:564
; CHECK-NEXT: buffer_load_dword a46, off, s[0:3], s32 offset:560
; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:556
; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:552
; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:548
; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:544
; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:540
; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:536
; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:532
; CHECK-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:528
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:524
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1504 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1508 ; 4-byte Folded Spill
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1512 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1516 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:1520 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1524 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:1528 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:1532 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:1536 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:1540 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:1544 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:1548 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:1552 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:1556 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:1560 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:1564 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:1568 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:1572 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:1576 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:1580 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:1584 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:1588 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:1592 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:1596 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v24, off, s[0:3], s32 offset:1600 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v25, off, s[0:3], s32 offset:1604 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:1608 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:1612 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:1616 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:1620 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:1624 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:1628 ; 4-byte Folded Spill
; CHECK-NEXT: v_cmp_lt_i32_e64 s[38:39], 2, v0
; CHECK-NEXT: s_and_saveexec_b64 s[50:51], s[38:39]
; CHECK-NEXT: s_xor_b64 s[50:51], exec, s[50:51]
; CHECK-NEXT: s_cbranch_execz .LBB0_4
; CHECK-NEXT: ; %bb.1: ; %LeafBlock46
; CHECK-NEXT: v_cmp_eq_u32_e64 s[38:39], 3, v0
; CHECK-NEXT: s_mov_b64 s[54:55], -1
; CHECK-NEXT: s_and_saveexec_b64 s[52:53], s[38:39]
; CHECK-NEXT: s_cbranch_execz .LBB0_3
; CHECK-NEXT: ; %bb.2: ; %bb2
; CHECK-NEXT: v_accvgpr_write_b32 a10, v42
; CHECK-NEXT: v_accvgpr_write_b32 a14, v48
; CHECK-NEXT: v_accvgpr_write_b32 a4, v56
; CHECK-NEXT: v_accvgpr_write_b32 a2, v50
; CHECK-NEXT: v_accvgpr_write_b32 a11, v43
; CHECK-NEXT: v_accvgpr_write_b32 a8, v40
; CHECK-NEXT: v_accvgpr_write_b32 a15, v49
; CHECK-NEXT: v_accvgpr_write_b32 a5, v57
; CHECK-NEXT: v_accvgpr_write_b32 a3, v51
; CHECK-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:1000 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:1004 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:1008 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:1012 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:1016 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:1020 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:1024 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:1032 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:1036 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:1040 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:1044 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:1048 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:1052 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:1056 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:1060 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:1064 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:1068 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:1072 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:1076 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:1080 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:1084 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:1088 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:1092 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:1096 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:1100 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:1104 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:1108 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:1112 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:1116 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:1120 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:1124 ; 4-byte Folded Reload
; CHECK-NEXT: v_accvgpr_write_b32 a16, v58
; CHECK-NEXT: v_accvgpr_write_b32 a17, v59
; CHECK-NEXT: v_accvgpr_read_b32 v60, a36
; CHECK-NEXT: v_accvgpr_read_b32 v58, a40
; CHECK-NEXT: s_xor_b64 s[54:55], exec, -1
; CHECK-NEXT: s_waitcnt vmcnt(29)
; CHECK-NEXT: v_xor_b32_e32 v28, v28, v18
; CHECK-NEXT: s_waitcnt vmcnt(28)
; CHECK-NEXT: v_xor_b32_e32 v0, v29, v3
; CHECK-NEXT: v_accvgpr_write_b32 a9, v0
; CHECK-NEXT: v_accvgpr_read_b32 v29, a27
; CHECK-NEXT: s_waitcnt vmcnt(26)
; CHECK-NEXT: v_xor_b32_e32 v29, v31, v29
; CHECK-NEXT: s_waitcnt vmcnt(24)
; CHECK-NEXT: v_xor_b32_e32 v0, v33, v7
; CHECK-NEXT: v_accvgpr_write_b32 a27, v0
; CHECK-NEXT: v_xor_b32_e32 v0, v30, v4
; CHECK-NEXT: v_accvgpr_write_b32 a12, v0
; CHECK-NEXT: s_waitcnt vmcnt(20)
; CHECK-NEXT: v_xor_b32_e32 v62, v37, v11
; CHECK-NEXT: v_xor_b32_e32 v0, v32, v6
; CHECK-NEXT: v_xor_b32_e32 v32, v34, v8
; CHECK-NEXT: v_xor_b32_e32 v34, v36, v10
; CHECK-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload
; CHECK-NEXT: v_accvgpr_read_b32 v31, a39
; CHECK-NEXT: v_accvgpr_read_b32 v30, a46
; CHECK-NEXT: v_xor_b32_e32 v33, v35, v9
; CHECK-NEXT: s_waitcnt vmcnt(20)
; CHECK-NEXT: v_xor_b32_e32 v35, v39, v13
; CHECK-NEXT: s_waitcnt vmcnt(15)
; CHECK-NEXT: v_xor_b32_e32 v10, v44, v30
; CHECK-NEXT: v_accvgpr_read_b32 v30, a44
; CHECK-NEXT: s_waitcnt vmcnt(11)
; CHECK-NEXT: v_xor_b32_e32 v6, v48, v30
; CHECK-NEXT: v_accvgpr_read_b32 v30, a42
; CHECK-NEXT: v_xor_b32_e32 v9, v47, v21
; CHECK-NEXT: s_waitcnt vmcnt(8)
; CHECK-NEXT: v_xor_b32_e32 v21, v51, v25
; CHECK-NEXT: v_accvgpr_write_b32 a36, v0
; CHECK-NEXT: s_waitcnt vmcnt(6)
; CHECK-NEXT: v_xor_b32_e32 v13, v53, v31
; CHECK-NEXT: v_accvgpr_read_b32 v31, a29
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: v_xor_b32_e32 v1, v55, v31
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: v_xor_b32_e32 v25, v56, v60
; CHECK-NEXT: v_xor_b32_e32 v8, v46, v20
; CHECK-NEXT: v_xor_b32_e32 v20, v50, v30
; CHECK-NEXT: v_accvgpr_read_b32 v30, a38
; CHECK-NEXT: v_xor_b32_e32 v0, v54, v58
; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; CHECK-NEXT: v_xor_b32_e32 v11, v45, v19
; CHECK-NEXT: v_xor_b32_e32 v19, v27, v5
; CHECK-NEXT: v_xor_b32_e32 v4, v52, v30
; CHECK-NEXT: v_xor_b32_e32 v30, v26, v2
; CHECK-NEXT: v_lshlrev_b32_e32 v5, 17, v25
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 14, v[0:1]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v5
; CHECK-NEXT: v_and_b32_e32 v5, 0x7fffffff, v13
; CHECK-NEXT: v_xor_b32_e32 v18, v38, v12
; CHECK-NEXT: v_lshlrev_b32_e32 v12, 18, v0
; CHECK-NEXT: v_lshrrev_b64 v[0:1], 13, v[4:5]
; CHECK-NEXT: v_or_b32_e32 v1, v1, v12
; CHECK-NEXT: v_xor_b32_e32 v7, v49, v23
; CHECK-NEXT: v_and_b32_e32 v21, 0x7fffffff, v21
; CHECK-NEXT: v_and_b32_e32 v7, 0x7fffffff, v7
; CHECK-NEXT: v_and_b32_e32 v9, 0x7fffffff, v9
; CHECK-NEXT: v_and_b32_e32 v11, 0x7fffffff, v11
; CHECK-NEXT: v_accvgpr_read_b32 v31, a37
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: v_xor_b32_e32 v23, v57, v31
; CHECK-NEXT: v_xor_b32_e32 v17, v43, v17
; CHECK-NEXT: v_xor_b32_e32 v16, v42, v16
; CHECK-NEXT: v_and_b32_e32 v17, 0x7fffffff, v17
; CHECK-NEXT: v_xor_b32_e32 v15, v41, v15
; CHECK-NEXT: v_xor_b32_e32 v14, v40, v14
; CHECK-NEXT: v_and_b32_e32 v15, 0x7fffffff, v15
; CHECK-NEXT: v_accvgpr_read_b32 v57, a5
; CHECK-NEXT: v_accvgpr_read_b32 v43, a11
; CHECK-NEXT: v_accvgpr_read_b32 v49, a15
; CHECK-NEXT: v_accvgpr_read_b32 v51, a3
; CHECK-NEXT: v_accvgpr_read_b32 v59, a17
; CHECK-NEXT: v_accvgpr_read_b32 v56, a4
; CHECK-NEXT: v_accvgpr_read_b32 v42, a10
; CHECK-NEXT: v_accvgpr_read_b32 v40, a8
; CHECK-NEXT: v_accvgpr_read_b32 v48, a14
; CHECK-NEXT: v_accvgpr_read_b32 v50, a2
; CHECK-NEXT: v_accvgpr_read_b32 v58, a16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: v_add_co_u32_e64 v12, s[38:39], 24, v36
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_addc_co_u32_e64 v13, s[38:39], 0, v37, s[38:39]
; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[0:3] offset:80
; CHECK-NEXT: s_movk_i32 s38, 0x7c
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 19, v4
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 12, v[20:21]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v0
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 20, v20
; CHECK-NEXT: v_lshrrev_b64 v[0:1], 11, v[6:7]
; CHECK-NEXT: v_or_b32_e32 v1, v1, v4
; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[0:3] offset:64
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 22, v8
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 21, v6
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 10, v[8:9]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v0
; CHECK-NEXT: v_lshrrev_b64 v[0:1], 9, v[10:11]
; CHECK-NEXT: v_or_b32_e32 v1, v1, v4
; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[0:3] offset:48
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 26, v18
; CHECK-NEXT: v_add_co_u32_e64 v0, s[38:39], s38, v36
; CHECK-NEXT: v_addc_co_u32_e64 v1, s[38:39], 0, v37, s[38:39]
; CHECK-NEXT: v_lshrrev_b32_e32 v2, 15, v23
; CHECK-NEXT: s_movk_i32 s38, 0x78
; CHECK-NEXT: flat_store_short v[0:1], v2
; CHECK-NEXT: v_add_co_u32_e64 v0, s[38:39], s38, v36
; CHECK-NEXT: v_alignbit_b32 v2, v23, v25, 15
; CHECK-NEXT: v_addc_co_u32_e64 v1, s[38:39], 0, v37, s[38:39]
; CHECK-NEXT: flat_store_dword v[0:1], v2
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffffff, v19
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v28
; CHECK-NEXT: v_or_b32_e32 v31, v0, v1
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 23, v10
; CHECK-NEXT: v_lshrrev_b64 v[0:1], 8, v[16:17]
; CHECK-NEXT: v_or_b32_e32 v1, v1, v2
; CHECK-NEXT: flat_store_dwordx2 v[36:37], v[0:1] offset:64
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 24, v16
; CHECK-NEXT: v_lshrrev_b64 v[0:1], 7, v[14:15]
; CHECK-NEXT: v_or_b32_e32 v1, v1, v2
; CHECK-NEXT: v_and_b32_e32 v19, 0x7fffffff, v35
; CHECK-NEXT: flat_store_dwordx2 v[12:13], v[0:1] offset:32
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 25, v14
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 6, v[18:19]
; CHECK-NEXT: v_and_b32_e32 v35, 0x7fffffff, v62
; CHECK-NEXT: v_or_b32_e32 v3, v3, v0
; CHECK-NEXT: v_lshrrev_b64 v[0:1], 5, v[34:35]
; CHECK-NEXT: v_or_b32_e32 v1, v1, v4
; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[0:3] offset:16
; CHECK-NEXT: v_accvgpr_read_b32 v6, a36
; CHECK-NEXT: v_mov_b32_e32 v0, v33
; CHECK-NEXT: v_and_b32_e32 v33, 0x7fffffff, v0
; CHECK-NEXT: v_lshrrev_b64 v[0:1], 4, v[32:33]
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 27, v34
; CHECK-NEXT: v_or_b32_e32 v1, v1, v2
; CHECK-NEXT: flat_store_dwordx2 v[36:37], v[0:1] offset:32
; CHECK-NEXT: v_accvgpr_read_b32 v0, a27
; CHECK-NEXT: v_accvgpr_read_b32 v2, a36
; CHECK-NEXT: v_and_b32_e32 v3, 0x7fffffff, v0
; CHECK-NEXT: v_lshrrev_b64 v[0:1], 3, v[2:3]
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 28, v32
; CHECK-NEXT: v_or_b32_e32 v1, v1, v2
; CHECK-NEXT: flat_store_dwordx2 v[12:13], v[0:1]
; CHECK-NEXT: v_mov_b32_e32 v0, v29
; CHECK-NEXT: v_accvgpr_read_b32 v2, a12
; CHECK-NEXT: v_and_b32_e32 v3, 0x7fffffff, v0
; CHECK-NEXT: v_lshrrev_b64 v[0:1], 2, v[2:3]
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 29, v6
; CHECK-NEXT: v_or_b32_e32 v1, v1, v2
; CHECK-NEXT: flat_store_dwordx2 v[36:37], v[0:1] offset:16
; CHECK-NEXT: v_accvgpr_read_b32 v0, a9
; CHECK-NEXT: v_accvgpr_read_b32 v4, a12
; CHECK-NEXT: v_and_b32_e32 v29, 0x7fffffff, v0
; CHECK-NEXT: v_lshrrev_b64 v[32:33], 1, v[28:29]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 30, v4
; CHECK-NEXT: v_or_b32_e32 v33, v33, v0
; CHECK-NEXT: flat_store_dwordx4 v[36:37], v[30:33]
; CHECK-NEXT: .LBB0_3: ; %Flow52
; CHECK-NEXT: s_or_b64 exec, exec, s[52:53]
; CHECK-NEXT: s_and_b64 s[52:53], s[54:55], exec
; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: .LBB0_4: ; %Flow51
; CHECK-NEXT: s_andn2_saveexec_b64 s[50:51], s[50:51]
; CHECK-NEXT: ; %bb.5: ; %LeafBlock
; CHECK-NEXT: v_cmp_ne_u32_e64 s[38:39], 2, v0
; CHECK-NEXT: s_andn2_b64 s[48:49], s[52:53], exec
; CHECK-NEXT: s_and_b64 s[38:39], s[38:39], exec
; CHECK-NEXT: s_or_b64 s[52:53], s[48:49], s[38:39]
; CHECK-NEXT: s_mov_b64 s[48:49], exec
; CHECK-NEXT: ; %bb.6: ; %Flow53
; CHECK-NEXT: s_or_b64 exec, exec, s[50:51]
; CHECK-NEXT: s_mov_b64 s[38:39], exec
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:992 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:996 ; 4-byte Folded Reload
; CHECK-NEXT: s_and_b64 s[50:51], s[38:39], s[52:53]
; CHECK-NEXT: s_mov_b64 exec, s[50:51]
; CHECK-NEXT: s_cbranch_execz .LBB0_44
; CHECK-NEXT: ; %bb.7: ; %bb0
; CHECK-NEXT: s_and_saveexec_b64 s[50:51], s[46:47]
; CHECK-NEXT: s_xor_b64 s[50:51], exec, s[50:51]
; CHECK-NEXT: s_cbranch_execz .LBB0_41
; CHECK-NEXT: ; %bb.8: ; %bb0b
; CHECK-NEXT: s_mov_b64 s[52:53], exec
; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:1456 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:1460 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:1464 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:1468 ; 4-byte Folded Reload
; CHECK-NEXT: s_and_b64 s[36:37], s[52:53], s[36:37]
; CHECK-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr9
; CHECK-NEXT: s_mov_b64 exec, s[36:37]
; CHECK-NEXT: s_cbranch_execz .LBB0_10
; CHECK-NEXT: ; %bb.9: ; %cond.load
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[0:1]
; CHECK-NEXT: .LBB0_10: ; %else
; CHECK-NEXT: s_or_b64 exec, exec, s[52:53]
; CHECK-NEXT: s_mov_b64 s[36:37], exec
; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:1448 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:1452 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:1440 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:1444 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:1424 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:1428 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:1432 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:1436 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:1416 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:1420 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:1408 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:1412 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:1496 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:1500 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:1400 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:1404 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:1264 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:1268 ; 4-byte Folded Reload
; CHECK-NEXT: s_and_b64 s[34:35], s[36:37], s[34:35]
; CHECK-NEXT: s_mov_b64 exec, s[34:35]
; CHECK-NEXT: s_cbranch_execz .LBB0_12
; CHECK-NEXT: ; %bb.11: ; %cond.load1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 v[10:11], v[0:1] offset:8
; CHECK-NEXT: .LBB0_12: ; %else2
; CHECK-NEXT: s_or_b64 exec, exec, s[36:37]
; CHECK-NEXT: s_and_saveexec_b64 s[34:35], s[30:31]
; CHECK-NEXT: s_cbranch_execz .LBB0_14
; CHECK-NEXT: ; %bb.13: ; %cond.load4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 v[12:13], v[0:1] offset:16
; CHECK-NEXT: .LBB0_14: ; %else5
; CHECK-NEXT: s_or_b64 exec, exec, s[34:35]
; CHECK-NEXT: s_and_saveexec_b64 s[30:31], s[94:95]
; CHECK-NEXT: s_cbranch_execz .LBB0_16
; CHECK-NEXT: ; %bb.15: ; %cond.load7
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 v[14:15], v[0:1] offset:24
; CHECK-NEXT: .LBB0_16: ; %else8
; CHECK-NEXT: s_or_b64 exec, exec, s[30:31]
; CHECK-NEXT: s_and_saveexec_b64 s[94:95], s[92:93]
; CHECK-NEXT: s_cbranch_execz .LBB0_18
; CHECK-NEXT: ; %bb.17: ; %cond.load10
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 v[22:23], v[0:1] offset:32
; CHECK-NEXT: .LBB0_18: ; %else11
; CHECK-NEXT: s_or_b64 exec, exec, s[94:95]
; CHECK-NEXT: s_and_saveexec_b64 s[92:93], s[90:91]
; CHECK-NEXT: s_cbranch_execz .LBB0_20
; CHECK-NEXT: ; %bb.19: ; %cond.load13
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 v[16:17], v[0:1] offset:40
; CHECK-NEXT: .LBB0_20: ; %else14
; CHECK-NEXT: s_or_b64 exec, exec, s[92:93]
; CHECK-NEXT: s_and_saveexec_b64 s[90:91], s[88:89]
; CHECK-NEXT: s_cbranch_execz .LBB0_22
; CHECK-NEXT: ; %bb.21: ; %cond.load16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 v[24:25], v[0:1] offset:48
; CHECK-NEXT: .LBB0_22: ; %else17
; CHECK-NEXT: s_or_b64 exec, exec, s[90:91]
; CHECK-NEXT: s_and_saveexec_b64 s[88:89], s[78:79]
; CHECK-NEXT: s_cbranch_execz .LBB0_24
; CHECK-NEXT: ; %bb.23: ; %cond.load19
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 v[18:19], v[0:1] offset:56
; CHECK-NEXT: .LBB0_24: ; %else20
; CHECK-NEXT: s_or_b64 exec, exec, s[88:89]
; CHECK-NEXT: s_and_saveexec_b64 s[78:79], s[76:77]
; CHECK-NEXT: s_cbranch_execz .LBB0_26
; CHECK-NEXT: ; %bb.25: ; %cond.load22
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 v[26:27], v[0:1] offset:64
; CHECK-NEXT: .LBB0_26: ; %else23
; CHECK-NEXT: s_or_b64 exec, exec, s[78:79]
; CHECK-NEXT: s_and_saveexec_b64 s[76:77], s[74:75]
; CHECK-NEXT: s_cbranch_execz .LBB0_28
; CHECK-NEXT: ; %bb.27: ; %cond.load25
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 v[20:21], v[0:1] offset:72
; CHECK-NEXT: .LBB0_28: ; %else26
; CHECK-NEXT: s_or_b64 exec, exec, s[76:77]
; CHECK-NEXT: s_and_saveexec_b64 s[74:75], s[72:73]
; CHECK-NEXT: s_cbranch_execz .LBB0_30
; CHECK-NEXT: ; %bb.29: ; %cond.load28
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 v[28:29], v[0:1] offset:80
; CHECK-NEXT: .LBB0_30: ; %else29
; CHECK-NEXT: s_or_b64 exec, exec, s[74:75]
; CHECK-NEXT: s_and_saveexec_b64 s[72:73], s[62:63]
; CHECK-NEXT: s_cbranch_execz .LBB0_32
; CHECK-NEXT: ; %bb.31: ; %cond.load31
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 v[32:33], v[0:1] offset:88
; CHECK-NEXT: .LBB0_32: ; %else32
; CHECK-NEXT: s_or_b64 exec, exec, s[72:73]
; CHECK-NEXT: s_and_saveexec_b64 s[62:63], s[60:61]
; CHECK-NEXT: s_cbranch_execz .LBB0_34
; CHECK-NEXT: ; %bb.33: ; %cond.load34
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 v[30:31], v[0:1] offset:96
; CHECK-NEXT: .LBB0_34: ; %else35
; CHECK-NEXT: s_or_b64 exec, exec, s[62:63]
; CHECK-NEXT: s_and_saveexec_b64 s[60:61], s[58:59]
; CHECK-NEXT: s_cbranch_execz .LBB0_36
; CHECK-NEXT: ; %bb.35: ; %cond.load37
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 v[2:3], v[0:1] offset:104
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1128 ; 4-byte Folded Spill
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1132 ; 4-byte Folded Spill
; CHECK-NEXT: .LBB0_36: ; %else38
; CHECK-NEXT: s_or_b64 exec, exec, s[60:61]
; CHECK-NEXT: s_and_saveexec_b64 s[58:59], s[56:57]
; CHECK-NEXT: s_cbranch_execz .LBB0_38
; CHECK-NEXT: ; %bb.37: ; %cond.load40
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 a[26:27], v[0:1] offset:112
; CHECK-NEXT: .LBB0_38: ; %else41
; CHECK-NEXT: s_or_b64 exec, exec, s[58:59]
; CHECK-NEXT: s_and_saveexec_b64 s[56:57], vcc
; CHECK-NEXT: s_cbranch_execz .LBB0_40
; CHECK-NEXT: ; %bb.39: ; %cond.load43
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_load_dwordx2 a[28:29], v[0:1] offset:120
; CHECK-NEXT: .LBB0_40: ; %else44
; CHECK-NEXT: s_or_b64 exec, exec, s[56:57]
; CHECK-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:1128 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:1132 ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_accvgpr_read_b32 v4, a28
; CHECK-NEXT: v_accvgpr_read_b32 v5, a29
; CHECK-NEXT: v_lshrrev_b32_e32 v2, 15, v5
; CHECK-NEXT: v_and_b32_e32 v31, 0x7fffffff, v31
; CHECK-NEXT: v_and_b32_e32 v33, 0x7fffffff, v33
; CHECK-NEXT: v_and_b32_e32 v29, 0x7fffffff, v29
; CHECK-NEXT: v_and_b32_e32 v21, 0x7fffffff, v21
; CHECK-NEXT: v_and_b32_e32 v27, 0x7fffffff, v27
; CHECK-NEXT: v_and_b32_e32 v19, 0x7fffffff, v19
; CHECK-NEXT: v_and_b32_e32 v25, 0x7fffffff, v25
; CHECK-NEXT: v_and_b32_e32 v17, 0x7fffffff, v17
; CHECK-NEXT: v_and_b32_e32 v23, 0x7fffffff, v23
; CHECK-NEXT: v_and_b32_e32 v15, 0x7fffffff, v15
; CHECK-NEXT: v_and_b32_e32 v13, 0x7fffffff, v13
; CHECK-NEXT: v_and_b32_e32 v11, 0x7fffffff, v11
; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 0x7c, v34
; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v35, vcc
; CHECK-NEXT: flat_store_short v[0:1], v2
; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 0x78, v34
; CHECK-NEXT: v_alignbit_b32 v2, v5, v4, 15
; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v35, vcc
; CHECK-NEXT: flat_store_dword v[0:1], v2
; CHECK-NEXT: v_accvgpr_read_b32 v2, a26
; CHECK-NEXT: v_accvgpr_read_b32 v3, a27
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 31, v10
; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v9
; CHECK-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
; CHECK-NEXT: v_or_b32_e32 v9, v1, v0
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 17, v4
; CHECK-NEXT: v_lshrrev_b64 v[4:5], 14, v[2:3]
; CHECK-NEXT: v_and_b32_e32 v37, 0x7fffffff, v37
; CHECK-NEXT: v_or_b32_e32 v5, v5, v0
; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 24, v34
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 18, v2
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 13, v[36:37]
; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v35, vcc
; CHECK-NEXT: v_or_b32_e32 v3, v3, v6
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:80
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 20, v30
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 19, v36
; CHECK-NEXT: v_lshrrev_b64 v[4:5], 12, v[30:31]
; CHECK-NEXT: v_or_b32_e32 v5, v5, v2
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 11, v[32:33]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v6
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:64
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v28
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 21, v32
; CHECK-NEXT: v_lshrrev_b64 v[4:5], 10, v[28:29]
; CHECK-NEXT: v_or_b32_e32 v5, v5, v2
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 9, v[20:21]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v6
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:48
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 26, v24
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v20
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 8, v[26:27]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v4
; CHECK-NEXT: flat_store_dwordx2 v[34:35], v[2:3] offset:64
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 24, v26
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 7, v[18:19]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v4
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:32
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 25, v18
; CHECK-NEXT: v_lshrrev_b64 v[4:5], 6, v[24:25]
; CHECK-NEXT: v_or_b32_e32 v5, v5, v2
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 5, v[16:17]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v6
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16
; CHECK-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11]
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 27, v16
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 4, v[22:23]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v4
; CHECK-NEXT: flat_store_dwordx2 v[34:35], v[2:3] offset:32
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 28, v22
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 3, v[14:15]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v4
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 29, v14
; CHECK-NEXT: v_lshrrev_b64 v[0:1], 2, v[12:13]
; CHECK-NEXT: v_or_b32_e32 v1, v1, v2
; CHECK-NEXT: flat_store_dwordx2 v[34:35], v[0:1] offset:16
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 30, v12
; CHECK-NEXT: v_or_b32_e32 v11, v11, v0
; CHECK-NEXT: flat_store_dwordx4 v[34:35], v[8:11]
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; CHECK-NEXT: ; kill: killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; CHECK-NEXT: .LBB0_41: ; %Flow
; CHECK-NEXT: s_andn2_saveexec_b64 s[56:57], s[50:51]
; CHECK-NEXT: s_cbranch_execz .LBB0_43
; CHECK-NEXT: ; %bb.42: ; %bb0a
; CHECK-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:1000 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:1004 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:1008 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:1012 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:1016 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:1020 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:1024 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:1032 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:1036 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:1040 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:1044 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:1048 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:1052 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:1056 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:1060 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:1064 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:1068 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:1072 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:1076 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:1080 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:1084 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:1088 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:1092 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:1096 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:1100 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:1104 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:1108 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:1112 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:1116 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:1120 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:1124 ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v11, 0x7fffffff, v11
; CHECK-NEXT: v_and_b32_e32 v13, 0x7fffffff, v13
; CHECK-NEXT: v_and_b32_e32 v15, 0x7fffffff, v15
; CHECK-NEXT: v_and_b32_e32 v17, 0x7fffffff, v17
; CHECK-NEXT: v_and_b32_e32 v19, 0x7fffffff, v19
; CHECK-NEXT: v_and_b32_e32 v21, 0x7fffffff, v21
; CHECK-NEXT: v_and_b32_e32 v23, 0x7fffffff, v23
; CHECK-NEXT: v_and_b32_e32 v25, 0x7fffffff, v25
; CHECK-NEXT: v_and_b32_e32 v27, 0x7fffffff, v27
; CHECK-NEXT: v_and_b32_e32 v29, 0x7fffffff, v29
; CHECK-NEXT: v_and_b32_e32 v31, 0x7fffffff, v31
; CHECK-NEXT: v_and_b32_e32 v33, 0x7fffffff, v33
; CHECK-NEXT: v_and_b32_e32 v35, 0x7fffffff, v35
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 18, v36
; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 0x7c, v52
; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v53, vcc
; CHECK-NEXT: v_lshrrev_b32_e32 v2, 15, v39
; CHECK-NEXT: flat_store_short v[0:1], v2
; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 0x78, v52
; CHECK-NEXT: v_alignbit_b32 v2, v39, v38, 15
; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v53, vcc
; CHECK-NEXT: flat_store_dword v[0:1], v2
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 31, v10
; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v9
; CHECK-NEXT: v_and_b32_e32 v37, 0x7fffffff, v37
; CHECK-NEXT: v_or_b32_e32 v9, v1, v0
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 17, v38
; CHECK-NEXT: v_lshrrev_b64 v[4:5], 14, v[36:37]
; CHECK-NEXT: v_or_b32_e32 v5, v5, v0
; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 24, v52
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 13, v[34:35]
; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v53, vcc
; CHECK-NEXT: v_or_b32_e32 v3, v3, v6
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:80
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 20, v32
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 19, v34
; CHECK-NEXT: v_lshrrev_b64 v[4:5], 12, v[32:33]
; CHECK-NEXT: v_or_b32_e32 v5, v5, v2
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 11, v[30:31]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v6
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:64
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v28
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 21, v30
; CHECK-NEXT: v_lshrrev_b64 v[4:5], 10, v[28:29]
; CHECK-NEXT: v_or_b32_e32 v5, v5, v2
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 9, v[26:27]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v6
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:48
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 26, v20
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v26
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 8, v[24:25]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v4
; CHECK-NEXT: flat_store_dwordx2 v[52:53], v[2:3] offset:64
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 24, v24
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 7, v[22:23]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v4
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:32
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 25, v22
; CHECK-NEXT: v_lshrrev_b64 v[4:5], 6, v[20:21]
; CHECK-NEXT: v_or_b32_e32 v5, v5, v2
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 5, v[18:19]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v6
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16
; CHECK-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11]
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 27, v18
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 4, v[16:17]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v4
; CHECK-NEXT: flat_store_dwordx2 v[52:53], v[2:3] offset:32
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 28, v16
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 3, v[14:15]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v4
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 29, v14
; CHECK-NEXT: v_lshrrev_b64 v[0:1], 2, v[12:13]
; CHECK-NEXT: v_or_b32_e32 v1, v1, v2
; CHECK-NEXT: flat_store_dwordx2 v[52:53], v[0:1] offset:16
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 30, v12
; CHECK-NEXT: v_or_b32_e32 v11, v11, v0
; CHECK-NEXT: flat_store_dwordx4 v[52:53], v[8:11]
; CHECK-NEXT: .LBB0_43: ; %Flow48
; CHECK-NEXT: s_or_b64 exec, exec, s[56:57]
; CHECK-NEXT: s_andn2_b64 s[48:49], s[48:49], exec
; CHECK-NEXT: .LBB0_44: ; %Flow54
; CHECK-NEXT: s_or_b64 exec, exec, s[38:39]
; CHECK-NEXT: s_and_saveexec_b64 s[56:57], s[48:49]
; CHECK-NEXT: s_cbranch_execz .LBB0_49
; CHECK-NEXT: ; %bb.45: ; %bb1
; CHECK-NEXT: s_and_saveexec_b64 s[58:59], s[46:47]
; CHECK-NEXT: s_xor_b64 s[46:47], exec, s[58:59]
; CHECK-NEXT: s_cbranch_execz .LBB0_47
; CHECK-NEXT: ; %bb.46: ; %bb1b
; CHECK-NEXT: v_accvgpr_read_b32 v31, a31
; CHECK-NEXT: v_accvgpr_read_b32 v32, a30
; CHECK-NEXT: v_lshrrev_b32_e32 v33, 15, v31
; CHECK-NEXT: v_alignbit_b32 v36, v31, v32, 15
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1504 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:1508 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:1512 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:1516 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:1520 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:1524 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:1528 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:1532 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:1536 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:1540 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:1544 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:1548 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:1552 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:1556 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:1560 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:1564 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:1568 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:1572 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:1576 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:1580 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:1584 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:1588 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:1592 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:1596 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:1600 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:1604 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:1608 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:1612 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:1616 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:1620 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:1624 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:1628 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:1488 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:1492 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:1480 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:1484 ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 17, v32
; CHECK-NEXT: v_and_b32_e32 v59, 0x7fffffff, v59
; CHECK-NEXT: v_and_b32_e32 v51, 0x7fffffff, v51
; CHECK-NEXT: v_and_b32_e32 v57, 0x7fffffff, v57
; CHECK-NEXT: v_and_b32_e32 v49, 0x7fffffff, v49
; CHECK-NEXT: v_and_b32_e32 v43, 0x7fffffff, v43
; CHECK-NEXT: v_and_b32_e32 v4, 0x7fffffff, v1
; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 0x7c, v10
; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v11, vcc
; CHECK-NEXT: flat_store_short v[0:1], v33
; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 0x78, v10
; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v11, vcc
; CHECK-NEXT: flat_store_dword v[0:1], v36
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 31, v42
; CHECK-NEXT: v_and_b32_e32 v3, 0x7fffffff, v3
; CHECK-NEXT: v_or_b32_e32 v41, v4, v0
; CHECK-NEXT: v_lshrrev_b64 v[4:5], 14, v[2:3]
; CHECK-NEXT: v_and_b32_e32 v9, 0x7fffffff, v9
; CHECK-NEXT: v_or_b32_e32 v5, v5, v6
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 18, v2
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 13, v[8:9]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v6
; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:1472 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:1476 ; 4-byte Folded Reload
; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 24, v10
; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v11, vcc
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:80
; CHECK-NEXT: v_lshrrev_b64 v[42:43], 1, v[42:43]
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 19, v8
; CHECK-NEXT: v_accvgpr_read_b32 v8, a32
; CHECK-NEXT: v_accvgpr_read_b32 v9, a33
; CHECK-NEXT: v_and_b32_e32 v9, 0x7fffffff, v9
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v7, 0x7fffffff, v7
; CHECK-NEXT: v_lshrrev_b64 v[4:5], 12, v[6:7]
; CHECK-NEXT: v_or_b32_e32 v5, v5, v2
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 20, v6
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 11, v[8:9]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v6
; CHECK-NEXT: v_accvgpr_read_b32 v6, a34
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:64
; CHECK-NEXT: v_accvgpr_read_b32 v7, a35
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 21, v8
; CHECK-NEXT: v_accvgpr_read_b32 v8, a24
; CHECK-NEXT: v_and_b32_e32 v7, 0x7fffffff, v7
; CHECK-NEXT: v_accvgpr_read_b32 v9, a25
; CHECK-NEXT: v_lshrrev_b64 v[4:5], 10, v[6:7]
; CHECK-NEXT: v_and_b32_e32 v9, 0x7fffffff, v9
; CHECK-NEXT: v_or_b32_e32 v5, v5, v2
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v6
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 9, v[8:9]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v6
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:48
; CHECK-NEXT: v_accvgpr_read_b32 v6, a22
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v8
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 8, v[58:59]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v4
; CHECK-NEXT: flat_store_dwordx2 v[10:11], v[2:3] offset:64
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 24, v58
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 7, v[50:51]
; CHECK-NEXT: v_accvgpr_read_b32 v7, a23
; CHECK-NEXT: v_accvgpr_read_b32 v8, a20
; CHECK-NEXT: v_or_b32_e32 v3, v3, v4
; CHECK-NEXT: v_and_b32_e32 v7, 0x7fffffff, v7
; CHECK-NEXT: v_accvgpr_read_b32 v9, a21
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:32
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 25, v50
; CHECK-NEXT: v_lshrrev_b64 v[4:5], 6, v[6:7]
; CHECK-NEXT: v_and_b32_e32 v9, 0x7fffffff, v9
; CHECK-NEXT: v_or_b32_e32 v5, v5, v2
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 26, v6
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 5, v[8:9]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v6
; CHECK-NEXT: v_accvgpr_read_b32 v6, a6
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16
; CHECK-NEXT: v_accvgpr_read_b32 v7, a7
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 27, v8
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 4, v[56:57]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v4
; CHECK-NEXT: v_and_b32_e32 v7, 0x7fffffff, v7
; CHECK-NEXT: flat_store_dwordx2 v[10:11], v[2:3] offset:32
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 28, v56
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 3, v[6:7]
; CHECK-NEXT: v_or_b32_e32 v3, v3, v4
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 29, v6
; CHECK-NEXT: v_lshrrev_b64 v[0:1], 2, v[48:49]
; CHECK-NEXT: v_or_b32_e32 v1, v1, v2
; CHECK-NEXT: flat_store_dwordx2 v[10:11], v[0:1] offset:16
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 30, v48
; CHECK-NEXT: v_or_b32_e32 v43, v43, v0
; CHECK-NEXT: flat_store_dwordx4 v[10:11], v[40:43]
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; CHECK-NEXT: ; kill: killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: ; kill: killed $vgpr0_vgpr1
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; CHECK-NEXT: ; kill: killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; CHECK-NEXT: .LBB0_47: ; %Flow49
; CHECK-NEXT: s_andn2_saveexec_b64 s[46:47], s[46:47]
; CHECK-NEXT: s_cbranch_execz .LBB0_49
; CHECK-NEXT: ; %bb.48: ; %bb1a
; CHECK-NEXT: buffer_load_dword a32, off, s[0:3], s32 offset:1136 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a33, off, s[0:3], s32 offset:1140 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a34, off, s[0:3], s32 offset:1144 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a35, off, s[0:3], s32 offset:1148 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a36, off, s[0:3], s32 offset:1152 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a37, off, s[0:3], s32 offset:1156 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a38, off, s[0:3], s32 offset:1160 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a39, off, s[0:3], s32 offset:1164 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a40, off, s[0:3], s32 offset:1168 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a41, off, s[0:3], s32 offset:1172 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a42, off, s[0:3], s32 offset:1176 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a43, off, s[0:3], s32 offset:1180 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a44, off, s[0:3], s32 offset:1184 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a45, off, s[0:3], s32 offset:1188 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a46, off, s[0:3], s32 offset:1192 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a47, off, s[0:3], s32 offset:1196 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a48, off, s[0:3], s32 offset:1200 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a49, off, s[0:3], s32 offset:1204 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a50, off, s[0:3], s32 offset:1208 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a51, off, s[0:3], s32 offset:1212 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a52, off, s[0:3], s32 offset:1216 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a53, off, s[0:3], s32 offset:1220 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a54, off, s[0:3], s32 offset:1224 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a55, off, s[0:3], s32 offset:1228 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a56, off, s[0:3], s32 offset:1232 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a57, off, s[0:3], s32 offset:1236 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a58, off, s[0:3], s32 offset:1240 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a59, off, s[0:3], s32 offset:1244 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a60, off, s[0:3], s32 offset:1248 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a61, off, s[0:3], s32 offset:1252 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a62, off, s[0:3], s32 offset:1256 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a63, off, s[0:3], s32 offset:1260 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a0, off, s[0:3], s32 offset:1272 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a1, off, s[0:3], s32 offset:1276 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a2, off, s[0:3], s32 offset:1280 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a3, off, s[0:3], s32 offset:1284 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a4, off, s[0:3], s32 offset:1288 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a5, off, s[0:3], s32 offset:1292 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a6, off, s[0:3], s32 offset:1296 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a7, off, s[0:3], s32 offset:1300 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a8, off, s[0:3], s32 offset:1304 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a9, off, s[0:3], s32 offset:1308 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a10, off, s[0:3], s32 offset:1312 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a11, off, s[0:3], s32 offset:1316 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a12, off, s[0:3], s32 offset:1320 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a13, off, s[0:3], s32 offset:1324 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a14, off, s[0:3], s32 offset:1328 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a15, off, s[0:3], s32 offset:1332 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a16, off, s[0:3], s32 offset:1336 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a17, off, s[0:3], s32 offset:1340 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a18, off, s[0:3], s32 offset:1344 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a19, off, s[0:3], s32 offset:1348 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a20, off, s[0:3], s32 offset:1352 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a21, off, s[0:3], s32 offset:1356 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a22, off, s[0:3], s32 offset:1360 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a23, off, s[0:3], s32 offset:1364 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a24, off, s[0:3], s32 offset:1368 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a25, off, s[0:3], s32 offset:1372 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a26, off, s[0:3], s32 offset:1376 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a27, off, s[0:3], s32 offset:1380 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a28, off, s[0:3], s32 offset:1384 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a29, off, s[0:3], s32 offset:1388 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a30, off, s[0:3], s32 offset:1392 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a31, off, s[0:3], s32 offset:1396 ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
; CHECK-NEXT: v_accvgpr_read_b32 v30, a30
; CHECK-NEXT: v_accvgpr_read_b32 v31, a31
; CHECK-NEXT: v_accvgpr_read_b32 v30, a32
; CHECK-NEXT: v_accvgpr_read_b32 v28, a28
; CHECK-NEXT: v_accvgpr_read_b32 v31, a33
; CHECK-NEXT: v_accvgpr_read_b32 v32, a34
; CHECK-NEXT: v_accvgpr_read_b32 v33, a35
; CHECK-NEXT: v_accvgpr_read_b32 v34, a36
; CHECK-NEXT: v_accvgpr_read_b32 v36, a38
; CHECK-NEXT: v_accvgpr_read_b32 v32, a28
; CHECK-NEXT: v_accvgpr_read_b32 v28, a60
; CHECK-NEXT: v_accvgpr_read_b32 v30, a62
; CHECK-NEXT: v_accvgpr_read_b32 v20, a20
; CHECK-NEXT: v_accvgpr_read_b32 v21, a21
; CHECK-NEXT: v_accvgpr_read_b32 v22, a22
; CHECK-NEXT: v_accvgpr_read_b32 v23, a23
; CHECK-NEXT: v_accvgpr_read_b32 v24, a24
; CHECK-NEXT: v_accvgpr_read_b32 v25, a25
; CHECK-NEXT: v_accvgpr_read_b32 v26, a26
; CHECK-NEXT: v_accvgpr_read_b32 v27, a27
; CHECK-NEXT: v_accvgpr_read_b32 v29, a29
; CHECK-NEXT: v_accvgpr_read_b32 v37, a39
; CHECK-NEXT: v_accvgpr_read_b32 v38, a40
; CHECK-NEXT: v_accvgpr_read_b32 v39, a41
; CHECK-NEXT: v_accvgpr_read_b32 v59, a61
; CHECK-NEXT: v_cndmask_b32_e64 v36, v28, v32, s[44:45]
; CHECK-NEXT: v_accvgpr_read_b32 v33, a31
; CHECK-NEXT: v_accvgpr_read_b32 v31, a63
; CHECK-NEXT: v_accvgpr_read_b32 v34, a30
; CHECK-NEXT: v_accvgpr_read_b32 v30, a62
; CHECK-NEXT: v_cndmask_b32_e64 v37, v59, v29, s[44:45]
; CHECK-NEXT: v_cndmask_b32_e64 v39, v31, v33, s[40:41]
; CHECK-NEXT: v_cndmask_b32_e64 v38, v30, v34, s[40:41]
; CHECK-NEXT: v_accvgpr_read_b32 v31, a27
; CHECK-NEXT: v_accvgpr_read_b32 v16, a16
; CHECK-NEXT: v_accvgpr_read_b32 v17, a17
; CHECK-NEXT: v_accvgpr_read_b32 v18, a18
; CHECK-NEXT: v_accvgpr_read_b32 v19, a19
; CHECK-NEXT: v_accvgpr_read_b32 v27, a23
; CHECK-NEXT: v_accvgpr_read_b32 v26, a22
; CHECK-NEXT: v_accvgpr_read_b32 v25, a21
; CHECK-NEXT: v_accvgpr_read_b32 v24, a20
; CHECK-NEXT: v_accvgpr_read_b32 v23, a19
; CHECK-NEXT: v_accvgpr_read_b32 v22, a18
; CHECK-NEXT: v_accvgpr_read_b32 v21, a17
; CHECK-NEXT: v_accvgpr_read_b32 v20, a16
; CHECK-NEXT: v_accvgpr_read_b32 v16, a48
; CHECK-NEXT: v_accvgpr_read_b32 v49, a51
; CHECK-NEXT: v_accvgpr_read_b32 v29, a25
; CHECK-NEXT: v_accvgpr_read_b32 v28, a24
; CHECK-NEXT: v_accvgpr_read_b32 v20, a52
; CHECK-NEXT: v_accvgpr_read_b32 v21, a53
; CHECK-NEXT: v_accvgpr_read_b32 v22, a54
; CHECK-NEXT: v_accvgpr_read_b32 v23, a55
; CHECK-NEXT: v_accvgpr_read_b32 v24, a56
; CHECK-NEXT: v_accvgpr_read_b32 v25, a57
; CHECK-NEXT: v_accvgpr_read_b32 v26, a58
; CHECK-NEXT: v_accvgpr_read_b32 v27, a59
; CHECK-NEXT: v_cndmask_b32_e64 v49, v25, v29, s[42:43]
; CHECK-NEXT: v_accvgpr_read_b32 v28, a24
; CHECK-NEXT: v_accvgpr_read_b32 v17, a49
; CHECK-NEXT: v_accvgpr_read_b32 v18, a50
; CHECK-NEXT: v_accvgpr_read_b32 v19, a51
; CHECK-NEXT: v_accvgpr_read_b32 v27, a23
; CHECK-NEXT: v_accvgpr_read_b32 v26, a22
; CHECK-NEXT: v_accvgpr_read_b32 v24, a20
; CHECK-NEXT: v_accvgpr_read_b32 v23, a19
; CHECK-NEXT: v_accvgpr_read_b32 v22, a18
; CHECK-NEXT: v_accvgpr_read_b32 v21, a17
; CHECK-NEXT: v_accvgpr_read_b32 v20, a16
; CHECK-NEXT: v_accvgpr_read_b32 v32, a30
; CHECK-NEXT: v_accvgpr_read_b32 v16, a48
; CHECK-NEXT: v_accvgpr_read_b32 v26, a58
; CHECK-NEXT: v_accvgpr_read_b32 v48, a50
; CHECK-NEXT: v_accvgpr_read_b32 v50, a52
; CHECK-NEXT: v_accvgpr_read_b32 v51, a53
; CHECK-NEXT: v_accvgpr_read_b32 v30, a26
; CHECK-NEXT: v_accvgpr_read_b32 v25, a21
; CHECK-NEXT: v_accvgpr_read_b32 v17, a49
; CHECK-NEXT: v_accvgpr_read_b32 v18, a50
; CHECK-NEXT: v_accvgpr_read_b32 v19, a51
; CHECK-NEXT: v_accvgpr_read_b32 v20, a52
; CHECK-NEXT: v_accvgpr_read_b32 v21, a53
; CHECK-NEXT: v_accvgpr_read_b32 v22, a54
; CHECK-NEXT: v_accvgpr_read_b32 v23, a55
; CHECK-NEXT: v_accvgpr_read_b32 v24, a56
; CHECK-NEXT: v_accvgpr_read_b32 v33, a27
; CHECK-NEXT: v_accvgpr_read_b32 v27, a59
; CHECK-NEXT: v_accvgpr_read_b32 v34, a26
; CHECK-NEXT: v_cndmask_b32_e64 v48, v24, v28, s[42:43]
; CHECK-NEXT: v_cndmask_b32_e64 v51, v27, v33, s[26:27]
; CHECK-NEXT: v_cndmask_b32_e64 v50, v26, v34, s[26:27]
; CHECK-NEXT: v_accvgpr_read_b32 v31, a23
; CHECK-NEXT: v_accvgpr_read_b32 v16, a48
; CHECK-NEXT: v_accvgpr_read_b32 v53, a55
; CHECK-NEXT: v_accvgpr_read_b32 v29, a21
; CHECK-NEXT: v_accvgpr_read_b32 v24, a16
; CHECK-NEXT: v_accvgpr_read_b32 v20, a52
; CHECK-NEXT: v_accvgpr_read_b32 v21, a53
; CHECK-NEXT: v_accvgpr_read_b32 v22, a54
; CHECK-NEXT: v_accvgpr_read_b32 v23, a55
; CHECK-NEXT: v_cndmask_b32_e64 v53, v21, v29, s[28:29]
; CHECK-NEXT: v_accvgpr_read_b32 v24, a20
; CHECK-NEXT: v_accvgpr_read_b32 v17, a49
; CHECK-NEXT: v_accvgpr_read_b32 v18, a50
; CHECK-NEXT: v_accvgpr_read_b32 v19, a51
; CHECK-NEXT: v_accvgpr_read_b32 v20, a16
; CHECK-NEXT: v_accvgpr_read_b32 v32, a26
; CHECK-NEXT: v_accvgpr_read_b32 v23, a19
; CHECK-NEXT: v_accvgpr_read_b32 v22, a18
; CHECK-NEXT: v_accvgpr_read_b32 v16, a48
; CHECK-NEXT: v_accvgpr_read_b32 v30, a22
; CHECK-NEXT: v_accvgpr_read_b32 v28, a20
; CHECK-NEXT: v_accvgpr_read_b32 v18, a50
; CHECK-NEXT: v_accvgpr_read_b32 v19, a51
; CHECK-NEXT: v_accvgpr_read_b32 v33, a23
; CHECK-NEXT: v_accvgpr_read_b32 v22, a54
; CHECK-NEXT: v_accvgpr_read_b32 v52, a54
; CHECK-NEXT: v_accvgpr_read_b32 v54, a56
; CHECK-NEXT: v_accvgpr_read_b32 v55, a57
; CHECK-NEXT: v_accvgpr_read_b32 v20, a52
; CHECK-NEXT: v_accvgpr_read_b32 v32, a22
; CHECK-NEXT: v_accvgpr_read_b32 v23, a55
; CHECK-NEXT: v_accvgpr_read_b32 v31, a19
; CHECK-NEXT: v_accvgpr_read_b32 v34, a22
; CHECK-NEXT: v_accvgpr_read_b32 v18, a50
; CHECK-NEXT: v_accvgpr_read_b32 v40, a42
; CHECK-NEXT: v_accvgpr_read_b32 v41, a43
; CHECK-NEXT: v_accvgpr_read_b32 v42, a44
; CHECK-NEXT: v_accvgpr_read_b32 v43, a45
; CHECK-NEXT: v_accvgpr_read_b32 v27, a19
; CHECK-NEXT: v_accvgpr_read_b32 v26, a18
; CHECK-NEXT: v_accvgpr_read_b32 v25, a17
; CHECK-NEXT: v_accvgpr_read_b32 v21, a17
; CHECK-NEXT: v_accvgpr_read_b32 v17, a49
; CHECK-NEXT: v_cndmask_b32_e64 v52, v20, v24, s[28:29]
; CHECK-NEXT: v_cndmask_b32_e64 v55, v23, v33, s[22:23]
; CHECK-NEXT: v_accvgpr_read_b32 v30, a18
; CHECK-NEXT: v_accvgpr_read_b32 v29, a17
; CHECK-NEXT: v_accvgpr_read_b32 v28, a16
; CHECK-NEXT: v_cndmask_b32_e64 v54, v22, v34, s[22:23]
; CHECK-NEXT: v_accvgpr_read_b32 v20, a16
; CHECK-NEXT: v_accvgpr_read_b32 v16, a48
; CHECK-NEXT: v_accvgpr_read_b32 v33, a19
; CHECK-NEXT: v_accvgpr_read_b32 v19, a51
; CHECK-NEXT: v_accvgpr_read_b32 v34, a18
; CHECK-NEXT: v_accvgpr_read_b32 v1, a1
; CHECK-NEXT: v_accvgpr_read_b32 v2, a2
; CHECK-NEXT: v_accvgpr_read_b32 v3, a3
; CHECK-NEXT: v_accvgpr_read_b32 v4, a4
; CHECK-NEXT: v_accvgpr_read_b32 v5, a5
; CHECK-NEXT: v_accvgpr_read_b32 v6, a6
; CHECK-NEXT: v_accvgpr_read_b32 v7, a7
; CHECK-NEXT: v_accvgpr_read_b32 v8, a8
; CHECK-NEXT: v_accvgpr_read_b32 v9, a9
; CHECK-NEXT: v_accvgpr_read_b32 v10, a10
; CHECK-NEXT: v_accvgpr_read_b32 v11, a11
; CHECK-NEXT: v_accvgpr_read_b32 v12, a12
; CHECK-NEXT: v_accvgpr_read_b32 v13, a13
; CHECK-NEXT: v_accvgpr_read_b32 v14, a14
; CHECK-NEXT: v_accvgpr_read_b32 v15, a15
; CHECK-NEXT: v_cndmask_b32_e64 v41, v17, v29, s[24:25]
; CHECK-NEXT: v_cndmask_b32_e64 v40, v16, v20, s[24:25]
; CHECK-NEXT: v_cndmask_b32_e64 v43, v19, v33, s[18:19]
; CHECK-NEXT: v_cndmask_b32_e64 v42, v18, v34, s[18:19]
; CHECK-NEXT: v_accvgpr_read_b32 v31, a15
; CHECK-NEXT: v_accvgpr_read_b32 v0, a32
; CHECK-NEXT: v_accvgpr_read_b32 v30, a14
; CHECK-NEXT: v_accvgpr_read_b32 v12, a44
; CHECK-NEXT: v_accvgpr_read_b32 v13, a45
; CHECK-NEXT: v_accvgpr_read_b32 v29, a13
; CHECK-NEXT: v_accvgpr_read_b32 v28, a12
; CHECK-NEXT: v_accvgpr_read_b32 v27, a11
; CHECK-NEXT: v_accvgpr_read_b32 v26, a10
; CHECK-NEXT: v_accvgpr_read_b32 v25, a9
; CHECK-NEXT: v_accvgpr_read_b32 v24, a8
; CHECK-NEXT: v_accvgpr_read_b32 v23, a7
; CHECK-NEXT: v_accvgpr_read_b32 v22, a6
; CHECK-NEXT: v_accvgpr_read_b32 v21, a5
; CHECK-NEXT: v_accvgpr_read_b32 v20, a4
; CHECK-NEXT: v_accvgpr_read_b32 v17, a1
; CHECK-NEXT: v_accvgpr_read_b32 v16, a0
; CHECK-NEXT: v_accvgpr_read_b32 v30, a12
; CHECK-NEXT: v_cndmask_b32_e64 v17, v13, v29, s[20:21]
; CHECK-NEXT: v_cndmask_b32_e64 v16, v12, v30, s[20:21]
; CHECK-NEXT: v_accvgpr_read_b32 v31, a11
; CHECK-NEXT: v_accvgpr_read_b32 v32, a18
; CHECK-NEXT: v_accvgpr_read_b32 v9, a41
; CHECK-NEXT: v_accvgpr_read_b32 v30, a10
; CHECK-NEXT: v_accvgpr_read_b32 v29, a9
; CHECK-NEXT: v_accvgpr_read_b32 v28, a8
; CHECK-NEXT: v_accvgpr_read_b32 v27, a7
; CHECK-NEXT: v_accvgpr_read_b32 v26, a6
; CHECK-NEXT: v_accvgpr_read_b32 v25, a5
; CHECK-NEXT: v_accvgpr_read_b32 v24, a4
; CHECK-NEXT: v_accvgpr_read_b32 v23, a3
; CHECK-NEXT: v_accvgpr_read_b32 v22, a2
; CHECK-NEXT: v_accvgpr_read_b32 v21, a1
; CHECK-NEXT: v_accvgpr_read_b32 v33, a15
; CHECK-NEXT: v_cndmask_b32_e64 v21, v9, v29, s[16:17]
; CHECK-NEXT: v_accvgpr_read_b32 v30, a8
; CHECK-NEXT: v_accvgpr_read_b32 v8, a40
; CHECK-NEXT: v_accvgpr_read_b32 v15, a47
; CHECK-NEXT: v_accvgpr_read_b32 v19, a3
; CHECK-NEXT: v_accvgpr_read_b32 v32, a14
; CHECK-NEXT: v_accvgpr_read_b32 v20, a0
; CHECK-NEXT: v_accvgpr_read_b32 v29, a7
; CHECK-NEXT: v_accvgpr_read_b32 v28, a6
; CHECK-NEXT: v_accvgpr_read_b32 v27, a5
; CHECK-NEXT: v_accvgpr_read_b32 v26, a4
; CHECK-NEXT: v_accvgpr_read_b32 v25, a3
; CHECK-NEXT: v_accvgpr_read_b32 v24, a2
; CHECK-NEXT: v_cndmask_b32_e64 v19, v15, v33, s[14:15]
; CHECK-NEXT: v_accvgpr_read_b32 v33, a11
; CHECK-NEXT: v_cndmask_b32_e64 v20, v8, v30, s[16:17]
; CHECK-NEXT: v_accvgpr_read_b32 v31, a7
; CHECK-NEXT: v_accvgpr_read_b32 v5, a37
; CHECK-NEXT: v_accvgpr_read_b32 v11, a43
; CHECK-NEXT: v_accvgpr_read_b32 v23, a1
; CHECK-NEXT: v_accvgpr_read_b32 v32, a10
; CHECK-NEXT: v_accvgpr_read_b32 v30, a6
; CHECK-NEXT: v_accvgpr_read_b32 v29, a5
; CHECK-NEXT: v_accvgpr_read_b32 v28, a4
; CHECK-NEXT: v_accvgpr_read_b32 v27, a3
; CHECK-NEXT: v_accvgpr_read_b32 v26, a2
; CHECK-NEXT: v_accvgpr_read_b32 v25, a1
; CHECK-NEXT: v_cndmask_b32_e64 v23, v11, v33, s[10:11]
; CHECK-NEXT: v_cndmask_b32_e64 v25, v5, v29, s[12:13]
; CHECK-NEXT: v_accvgpr_read_b32 v30, a4
; CHECK-NEXT: v_accvgpr_read_b32 v33, a7
; CHECK-NEXT: v_accvgpr_read_b32 v4, a36
; CHECK-NEXT: v_accvgpr_read_b32 v7, a39
; CHECK-NEXT: v_accvgpr_read_b32 v24, a0
; CHECK-NEXT: v_accvgpr_read_b32 v27, a1
; CHECK-NEXT: v_accvgpr_read_b32 v32, a6
; CHECK-NEXT: v_cndmask_b32_e64 v24, v4, v30, s[12:13]
; CHECK-NEXT: v_cndmask_b32_e64 v27, v7, v33, s[6:7]
; CHECK-NEXT: v_accvgpr_read_b32 v33, a3
; CHECK-NEXT: v_accvgpr_read_b32 v29, a3
; CHECK-NEXT: v_accvgpr_read_b32 v28, a2
; CHECK-NEXT: v_accvgpr_read_b32 v30, a0
; CHECK-NEXT: v_accvgpr_read_b32 v30, a2
; CHECK-NEXT: v_accvgpr_read_b32 v29, a1
; CHECK-NEXT: v_accvgpr_read_b32 v28, a0
; CHECK-NEXT: v_accvgpr_read_b32 v29, a1
; CHECK-NEXT: v_accvgpr_read_b32 v28, a0
; CHECK-NEXT: v_accvgpr_read_b32 v1, a33
; CHECK-NEXT: v_accvgpr_read_b32 v28, a0
; CHECK-NEXT: v_cndmask_b32_e64 v29, v1, v29, s[4:5]
; CHECK-NEXT: v_cndmask_b32_e64 v28, v0, v28, s[4:5]
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload
; CHECK-NEXT: v_accvgpr_read_b32 v14, a46
; CHECK-NEXT: v_accvgpr_read_b32 v18, a2
; CHECK-NEXT: v_accvgpr_read_b32 v34, a14
; CHECK-NEXT: v_accvgpr_read_b32 v2, a34
; CHECK-NEXT: v_accvgpr_read_b32 v3, a35
; CHECK-NEXT: v_accvgpr_read_b32 v10, a42
; CHECK-NEXT: v_cndmask_b32_e64 v18, v14, v34, s[14:15]
; CHECK-NEXT: v_accvgpr_read_b32 v22, a0
; CHECK-NEXT: v_accvgpr_read_b32 v34, a10
; CHECK-NEXT: v_accvgpr_read_b32 v31, a1
; CHECK-NEXT: v_accvgpr_read_b32 v6, a38
; CHECK-NEXT: v_cndmask_b32_e64 v22, v10, v34, s[10:11]
; CHECK-NEXT: v_accvgpr_read_b32 v26, a0
; CHECK-NEXT: v_accvgpr_read_b32 v34, a6
; CHECK-NEXT: v_cndmask_b32_e64 v31, v3, v33, s[8:9]
; CHECK-NEXT: v_cndmask_b32_e64 v30, v2, v30, s[8:9]
; CHECK-NEXT: v_cndmask_b32_e64 v26, v6, v34, s[6:7]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[28:31]
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[24:27] offset:16
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[20:23] offset:32
; CHECK-NEXT: v_accvgpr_read_b32 v35, a37
; CHECK-NEXT: v_add_co_u32_e32 v20, vcc, 48, v0
; CHECK-NEXT: v_addc_co_u32_e32 v21, vcc, 0, v1, vcc
; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, 0x70, v0
; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; CHECK-NEXT: v_accvgpr_read_b32 v44, a46
; CHECK-NEXT: v_accvgpr_read_b32 v45, a47
; CHECK-NEXT: v_accvgpr_read_b32 v46, a48
; CHECK-NEXT: v_accvgpr_read_b32 v47, a49
; CHECK-NEXT: v_accvgpr_read_b32 v56, a58
; CHECK-NEXT: v_accvgpr_read_b32 v57, a59
; CHECK-NEXT: v_accvgpr_read_b32 v58, a60
; CHECK-NEXT: v_accvgpr_read_b32 v60, a62
; CHECK-NEXT: v_accvgpr_read_b32 v61, a63
; CHECK-NEXT: v_accvgpr_read_b32 v32, a2
; CHECK-NEXT: flat_store_dwordx4 v[20:21], v[16:19]
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[40:43] offset:64
; CHECK-NEXT: flat_store_dwordx4 v[20:21], v[52:55] offset:32
; CHECK-NEXT: flat_store_dwordx4 v[20:21], v[48:51] offset:48
; CHECK-NEXT: flat_store_dwordx4 v[4:5], v[36:39]
; CHECK-NEXT: .LBB0_49: ; %UnifiedReturnBlock
; CHECK-NEXT: s_or_b64 exec, exec, s[56:57]
; CHECK-NEXT: buffer_load_dword a63, off, s[0:3], s32 offset:796 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a62, off, s[0:3], s32 offset:800 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a61, off, s[0:3], s32 offset:804 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a60, off, s[0:3], s32 offset:808 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a59, off, s[0:3], s32 offset:812 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a58, off, s[0:3], s32 offset:816 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a57, off, s[0:3], s32 offset:820 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a56, off, s[0:3], s32 offset:824 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a55, off, s[0:3], s32 offset:828 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a54, off, s[0:3], s32 offset:832 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a53, off, s[0:3], s32 offset:836 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a52, off, s[0:3], s32 offset:840 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a51, off, s[0:3], s32 offset:844 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a50, off, s[0:3], s32 offset:848 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a49, off, s[0:3], s32 offset:852 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a48, off, s[0:3], s32 offset:856 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a47, off, s[0:3], s32 offset:860 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a46, off, s[0:3], s32 offset:864 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a45, off, s[0:3], s32 offset:868 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a44, off, s[0:3], s32 offset:872 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a43, off, s[0:3], s32 offset:876 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a42, off, s[0:3], s32 offset:880 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a41, off, s[0:3], s32 offset:884 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a40, off, s[0:3], s32 offset:888 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a39, off, s[0:3], s32 offset:892 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a38, off, s[0:3], s32 offset:896 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a37, off, s[0:3], s32 offset:900 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a36, off, s[0:3], s32 offset:904 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a35, off, s[0:3], s32 offset:908 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a34, off, s[0:3], s32 offset:912 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a33, off, s[0:3], s32 offset:916 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a32, off, s[0:3], s32 offset:920 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:924 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:928 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:932 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:936 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:940 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:944 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:948 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:952 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:956 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:960 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:964 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:968 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:972 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:976 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:980 ; 4-byte Folded Reload
; CHECK-NEXT: v_readlane_b32 s30, v63, 14
; CHECK-NEXT: v_readlane_b32 s31, v63, 15
; CHECK-NEXT: v_readlane_b32 s55, v63, 13
; CHECK-NEXT: v_readlane_b32 s54, v63, 12
; CHECK-NEXT: v_readlane_b32 s53, v63, 11
; CHECK-NEXT: v_readlane_b32 s52, v63, 10
; CHECK-NEXT: v_readlane_b32 s51, v63, 9
; CHECK-NEXT: v_readlane_b32 s50, v63, 8
; CHECK-NEXT: v_readlane_b32 s49, v63, 7
; CHECK-NEXT: v_readlane_b32 s48, v63, 6
; CHECK-NEXT: v_readlane_b32 s39, v63, 5
; CHECK-NEXT: v_readlane_b32 s38, v63, 4
; CHECK-NEXT: v_readlane_b32 s37, v63, 3
; CHECK-NEXT: v_readlane_b32 s36, v63, 2
; CHECK-NEXT: v_readlane_b32 s35, v63, 1
; CHECK-NEXT: v_readlane_b32 s34, v63, 0
; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:1632 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
<16 x double> %da, <16 x double> %db,
<16 x i63> %v, <16 x i63> %w, <16 x i63> %a,
ptr %sink, i32 %disc, i1 %c) #0 {
entry:
switch i32 %disc, label %bb0 [
i32 2, label %bb1
i32 3, label %bb2
]
bb0:
br i1 %c, label %bb0a, label %bb0b
bb0a:
store <16 x i63> %a, ptr %sink
ret void
bb0b:
%ld = call <16 x i63> @llvm.masked.load.v16i63.p0(ptr %p, <16 x i1> %m, <16 x i63> %pt)
store <16 x i63> %ld, ptr %sink
ret void
bb1:
br i1 %c, label %bb1a, label %bb1b
bb1a:
%sel = select <16 x i1> %sc, <16 x double> %da, <16 x double> %db
store <16 x double> %sel, ptr %sink
ret void
bb1b:
store <16 x i63> %v, ptr %sink
ret void
bb2:
%x = xor <16 x i63> %a, %w
store <16 x i63> %x, ptr %sink
ret void
}
attributes #0 = { nounwind }