| # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s |
| |
| --- | |
| define amdgpu_kernel void @largeInterleave() #0 { ret void } |
| ; GCN-LABEL: largeInterleave: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: ; implicit-def: $vgpr16 |
| ; GCN-NEXT: ; implicit-def: $vgpr25 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_readfirstlane_b32 s17, v16 |
| ; GCN-NEXT: ; implicit-def: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 |
| ; GCN-NEXT: ; implicit-def: $vgpr17 |
| ; GCN-NEXT: ; implicit-def: $sgpr15 |
| ; GCN-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11 |
| ; GCN-NEXT: s_lshl_b32 s18, s17, 7 |
| ; GCN-NEXT: ; implicit-def: $vgpr18 |
| ; GCN-NEXT: v_add_lshl_u32 v230, v18, s18, 1 |
| ; GCN-NEXT: v_lshl_add_u32 v25, s17, 4, v25 |
| ; GCN-NEXT: v_mul_lo_u32 v25, v25, s6 |
| ; GCN-NEXT: v_add_lshl_u32 v226, v25, v17, 1 |
| ; GCN-NEXT: v_add_u32_e32 v17, s15, v226 |
| ; GCN-NEXT: buffer_load_dwordx4 v[64:67], v226, s[8:11], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx4 v[68:71], v17, s[8:11], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_add_u32_e32 v72, 64, v17 |
| ; GCN-NEXT: ; implicit-def: $vgpr213 |
| ; GCN-NEXT: ; implicit-def: $vgpr152_vgpr153_vgpr154_vgpr155 |
| ; GCN-NEXT: ; implicit-def: $vgpr246 |
| ; GCN-NEXT: v_add_u32_e32 v188, 0x80, v17 |
| ; GCN-NEXT: ; implicit-def: $vgpr156_vgpr157_vgpr158_vgpr159 |
| ; GCN-NEXT: ; implicit-def: $vgpr144_vgpr145_vgpr146_vgpr147 |
| ; GCN-NEXT: ; implicit-def: $vgpr19 |
| ; GCN-NEXT: ; implicit-def: $vgpr26 |
| ; GCN-NEXT: ; implicit-def: $vgpr27 |
| ; GCN-NEXT: v_add_u32_e32 v227, 0xc0, v17 |
| ; GCN-NEXT: v_add_u32_e32 v231, v19, v26 |
| ; GCN-NEXT: v_add_u32_e32 v232, v19, v27 |
| ; GCN-NEXT: ; implicit-def: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-NEXT: ; implicit-def: $vgpr28 |
| ; GCN-NEXT: ; implicit-def: $vgpr29 |
| ; GCN-NEXT: v_add_u32_e32 v233, v19, v28 |
| ; GCN-NEXT: v_add_u32_e32 v234, v19, v29 |
| ; GCN-NEXT: ; implicit-def: $vgpr140_vgpr141_vgpr142_vgpr143 |
| ; GCN-NEXT: ; implicit-def: $sgpr5 |
| ; GCN-NEXT: ; implicit-def: $sgpr7 |
| ; GCN-NEXT: ; implicit-def: $vgpr148_vgpr149_vgpr150_vgpr151 |
| ; GCN-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139 |
| ; GCN-NEXT: ; implicit-def: $vgpr132_vgpr133_vgpr134_vgpr135 |
| ; GCN-NEXT: ; implicit-def: $vgpr20 |
| ; GCN-NEXT: v_add_u32_e32 v18, s17, v20 |
| ; GCN-NEXT: v_and_b32_e32 v18, 0x1fffffff, v18 |
| ; GCN-NEXT: ; implicit-def: $sgpr16 |
| ; GCN-NEXT: v_mul_lo_u32 v18, v18, s16 |
| ; GCN-NEXT: ; implicit-def: $vgpr21 |
| ; GCN-NEXT: v_add_lshl_u32 v199, v21, v18, 1 |
| ; GCN-NEXT: ; implicit-def: $vgpr22 |
| ; GCN-NEXT: v_lshl_add_u32 v200, v22, 1, v199 |
| ; GCN-NEXT: ; implicit-def: $vgpr23 |
| ; GCN-NEXT: v_lshl_add_u32 v201, v23, 1, v200 |
| ; GCN-NEXT: ; implicit-def: $vgpr24 |
| ; GCN-NEXT: v_lshl_add_u32 v202, v24, 1, v201 |
| ; GCN-NEXT: ; implicit-def: $vgpr16 |
| ; GCN-NEXT: ; implicit-def: $vgpr18 |
| ; GCN-NEXT: ; implicit-def: $vgpr20 |
| ; GCN-NEXT: ; implicit-def: $vgpr24 |
| ; GCN-NEXT: v_add_u32_e32 v247, v19, v24 |
| ; GCN-NEXT: v_add_u32_e32 v248, v19, v16 |
| ; GCN-NEXT: v_add_u32_e32 v249, v19, v18 |
| ; GCN-NEXT: v_add_u32_e32 v250, v19, v20 |
| ; GCN-NEXT: ; implicit-def: $vgpr128_vgpr129_vgpr130_vgpr131 |
| ; GCN-NEXT: ; implicit-def: $sgpr14 |
| ; GCN-NEXT: ; implicit-def: $vgpr196 |
| ; GCN-NEXT: ; implicit-def: $sgpr12_sgpr13 |
| ; GCN-NEXT: ; implicit-def: $vgpr211 |
| ; GCN-NEXT: v_max_f32_e32 v212, v211, v211 |
| ; GCN-NEXT: ; implicit-def: $vgpr198 |
| ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 |
| ; GCN-NEXT: ; implicit-def: $vgpr32 |
| ; GCN-NEXT: ; implicit-def: $vgpr33 |
| ; GCN-NEXT: ; implicit-def: $vgpr34 |
| ; GCN-NEXT: v_add_u32_e32 v210, v19, v34 |
| ; GCN-NEXT: v_add_u32_e32 v206, v19, v33 |
| ; GCN-NEXT: v_add_u32_e32 v205, v19, v32 |
| ; GCN-NEXT: ; implicit-def: $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 |
| ; GCN-NEXT: ; implicit-def: $vgpr21 |
| ; GCN-NEXT: ; implicit-def: $vgpr22 |
| ; GCN-NEXT: ; implicit-def: $vgpr23 |
| ; GCN-NEXT: ; implicit-def: $vgpr30 |
| ; GCN-NEXT: ; implicit-def: $vgpr31 |
| ; GCN-NEXT: v_add_u32_e32 v207, v19, v21 |
| ; GCN-NEXT: v_add_u32_e32 v208, v19, v22 |
| ; GCN-NEXT: v_add_u32_e32 v209, v19, v23 |
| ; GCN-NEXT: v_add_u32_e32 v203, v19, v30 |
| ; GCN-NEXT: v_add_u32_e32 v204, v19, v31 |
| ; GCN-NEXT: ; kill: killed $vgpr17 |
| ; GCN-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 |
| ; GCN-NEXT: ; implicit-def: $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 |
| ; GCN-NEXT: ; implicit-def: $vgpr197 |
| ; GCN-NEXT: ; iglp_opt mask(0x00000002) |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ds_write_b128 v230, v[64:67] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b128 v230, v[68:71] offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_load_dwordx4 v[160:163], v226, s[8:11], 0 offen offset:64 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx4 v[164:167], v72, s[8:11], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: ds_read_b128 v[64:67], v213 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[64:65], v[152:153], 0 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[66:67], v[154:155], v[112:127] |
| ; GCN-NEXT: ds_read_b128 v[64:67], v213 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[64:65], v[152:153], 0 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[66:67], v[154:155], v[96:111] |
| ; GCN-NEXT: ds_read_b128 v[64:67], v213 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[168:171], v213 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[172:175], v246 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[176:179], v246 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[180:183], v246 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[184:187], v246 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[64:65], v[152:153], 0 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ds_write_b128 v230, v[160:163] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[66:67], v[154:155], v[80:95] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b128 v230, v[164:167] offset:1024 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[168:169], v[152:153], 0 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[170:171], v[154:155], v[64:79] |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_load_dwordx4 v[152:155], v226, s[8:11], 0 offen offset:128 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx4 v[160:163], v188, s[8:11], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: ds_read_b128 v[188:191], v213 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[192:195], v213 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[164:167], v213 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[214:217], v213 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[172:173], v[156:157], v[112:127] |
| ; GCN-NEXT: ds_read_b128 v[218:221], v246 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[222:225], v246 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[168:171], v246 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[174:175], v[158:159], v[112:127] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[188:189], v[144:145], v[112:127] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[190:191], v[146:147], v[112:127] |
| ; GCN-NEXT: ds_read_b128 v[188:191], v246 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ds_write_b128 v230, v[152:155] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b128 v230, v[160:163] offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_load_dwordx4 v[152:155], v226, s[8:11], 0 offen offset:192 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[184:185], v[156:157], v[64:79] |
| ; GCN-NEXT: buffer_load_dwordx4 v[226:229], v227, s[8:11], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx2 v[160:161], v231, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx2 v[162:163], v232, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx2 v[172:173], v233, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx2 v[174:175], v234, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[186:187], v[158:159], v[64:79] |
| ; GCN-NEXT: v_perm_b32 v238, v162, v160, s5 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[218:219], v[140:141], v[112:127] |
| ; GCN-NEXT: v_perm_b32 v240, v162, v160, s7 |
| ; GCN-NEXT: v_perm_b32 v242, v163, v161, s5 |
| ; GCN-NEXT: v_perm_b32 v244, v163, v161, s7 |
| ; GCN-NEXT: ds_read_b128 v[160:163], v213 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_perm_b32 v239, v174, v172, s5 |
| ; GCN-NEXT: v_perm_b32 v241, v174, v172, s7 |
| ; GCN-NEXT: v_perm_b32 v243, v175, v173, s5 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[214:215], v[144:145], v[64:79] |
| ; GCN-NEXT: v_perm_b32 v245, v175, v173, s7 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[176:177], v[156:157], v[96:111] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[220:221], v[142:143], v[112:127] |
| ; GCN-NEXT: ds_read_b128 v[218:221], v213 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[172:175], v213 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[216:217], v[146:147], v[64:79] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[178:179], v[158:159], v[96:111] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[160:161], v[148:149], v[112:127] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[188:189], v[140:141], v[64:79] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[192:193], v[144:145], v[96:111] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[162:163], v[150:151], v[112:127] |
| ; GCN-NEXT: ds_read_b128 v[160:163], v213 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[184:187], v246 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[214:217], v246 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[176:179], v246 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[190:191], v[142:143], v[64:79] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[194:195], v[146:147], v[96:111] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[160:161], v[148:149], v[64:79] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[180:181], v[156:157], v[80:95] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[184:185], v[136:137], v[112:127] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[222:223], v[140:141], v[96:111] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[162:163], v[150:151], v[64:79] |
| ; GCN-NEXT: ds_read_b128 v[160:163], v246 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ds_write_b128 v230, v[152:155] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b128 v230, v[226:229] offset:1024 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[182:183], v[158:159], v[80:95] |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_read_b128 v[156:159], v213 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[226:229], v213 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[180:183], v213 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[152:155], v213 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[230:233], v246 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[234:237], v246 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[186:187], v[138:139], v[112:127] |
| ; GCN-NEXT: ds_read_b128 v[184:187], v246 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[224:225], v[142:143], v[96:111] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[156:157], v[132:133], v[112:127] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[218:219], v[148:149], v[96:111] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[158:159], v[134:135], v[112:127] |
| ; GCN-NEXT: ds_read_b128 v[156:159], v246 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v199, v[238:239] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v200, v[240:241] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v201, v[242:243] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v202, v[244:245] |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_load_dwordx2 v[192:193], v247, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[220:221], v[150:151], v[96:111] |
| ; GCN-NEXT: buffer_load_dwordx2 v[194:195], v248, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx2 v[218:219], v249, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx2 v[220:221], v250, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_perm_b32 v188, v194, v192, s5 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[164:165], v[144:145], v[80:95] |
| ; GCN-NEXT: v_perm_b32 v189, v220, v218, s5 |
| ; GCN-NEXT: v_perm_b32 v191, v220, v218, s7 |
| ; GCN-NEXT: v_perm_b32 v190, v194, v192, s7 |
| ; GCN-NEXT: v_perm_b32 v192, v195, v193, s5 |
| ; GCN-NEXT: v_perm_b32 v194, v195, v193, s7 |
| ; GCN-NEXT: v_perm_b32 v193, v221, v219, s5 |
| ; GCN-NEXT: v_perm_b32 v195, v221, v219, s7 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[166:167], v[146:147], v[80:95] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[168:169], v[140:141], v[80:95] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[170:171], v[142:143], v[80:95] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[172:173], v[148:149], v[80:95] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[214:215], v[136:137], v[96:111] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[174:175], v[150:151], v[80:95] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[216:217], v[138:139], v[96:111] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[176:177], v[136:137], v[80:95] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[226:227], v[132:133], v[96:111] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[178:179], v[138:139], v[80:95] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[160:161], v[136:137], v[64:79] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[230:231], v[128:129], v[112:127] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[228:229], v[134:135], v[96:111] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[180:181], v[132:133], v[80:95] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[162:163], v[138:139], v[64:79] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[232:233], v[130:131], v[112:127] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[234:235], v[128:129], v[96:111] |
| ; GCN-NEXT: s_nop 9 |
| ; GCN-NEXT: v_mul_f32_e32 v213, s4, v112 |
| ; GCN-NEXT: v_mul_f32_e32 v218, s4, v113 |
| ; GCN-NEXT: v_max3_f32 v213, v213, s14, v218 |
| ; GCN-NEXT: v_mul_f32_e32 v218, s4, v114 |
| ; GCN-NEXT: v_mul_f32_e32 v219, s4, v115 |
| ; GCN-NEXT: v_max3_f32 v213, v213, v218, v219 |
| ; GCN-NEXT: v_mul_f32_e32 v218, s4, v116 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[182:183], v[134:135], v[80:95] |
| ; GCN-NEXT: v_mul_f32_e32 v219, s4, v117 |
| ; GCN-NEXT: v_max3_f32 v213, v213, v218, v219 |
| ; GCN-NEXT: v_mul_f32_e32 v218, s4, v118 |
| ; GCN-NEXT: v_mul_f32_e32 v219, s4, v119 |
| ; GCN-NEXT: v_max3_f32 v213, v213, v218, v219 |
| ; GCN-NEXT: v_mul_f32_e32 v218, s4, v120 |
| ; GCN-NEXT: v_mul_f32_e32 v219, s4, v121 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[152:153], v[132:133], v[64:79] |
| ; GCN-NEXT: v_max3_f32 v213, v213, v218, v219 |
| ; GCN-NEXT: v_mul_f32_e32 v218, s4, v122 |
| ; GCN-NEXT: v_mul_f32_e32 v219, s4, v123 |
| ; GCN-NEXT: v_max3_f32 v213, v213, v218, v219 |
| ; GCN-NEXT: v_mul_f32_e32 v218, s4, v124 |
| ; GCN-NEXT: v_mul_f32_e32 v219, s4, v125 |
| ; GCN-NEXT: v_max3_f32 v213, v213, v218, v219 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[236:237], v[130:131], v[96:111] |
| ; GCN-NEXT: v_mul_f32_e32 v218, s4, v126 |
| ; GCN-NEXT: v_mul_f32_e32 v219, s4, v127 |
| ; GCN-NEXT: v_max3_f32 v213, v213, v218, v219 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[184:185], v[128:129], v[80:95] |
| ; GCN-NEXT: s_nop 6 |
| ; GCN-NEXT: v_mul_f32_e32 v214, s4, v96 |
| ; GCN-NEXT: v_mul_f32_e32 v215, s4, v97 |
| ; GCN-NEXT: v_max3_f32 v213, v213, v214, v215 |
| ; GCN-NEXT: v_mul_f32_e32 v214, s4, v98 |
| ; GCN-NEXT: v_mul_f32_e32 v215, s4, v99 |
| ; GCN-NEXT: v_max3_f32 v213, v213, v214, v215 |
| ; GCN-NEXT: v_mul_f32_e32 v214, s4, v100 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[154:155], v[134:135], v[64:79] |
| ; GCN-NEXT: v_mul_f32_e32 v215, s4, v101 |
| ; GCN-NEXT: v_max3_f32 v213, v213, v214, v215 |
| ; GCN-NEXT: v_mul_f32_e32 v214, s4, v102 |
| ; GCN-NEXT: v_mul_f32_e32 v215, s4, v103 |
| ; GCN-NEXT: v_max3_f32 v213, v213, v214, v215 |
| ; GCN-NEXT: v_mul_f32_e32 v214, s4, v104 |
| ; GCN-NEXT: v_mul_f32_e32 v215, s4, v105 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[186:187], v[130:131], v[80:95] |
| ; GCN-NEXT: v_max3_f32 v213, v213, v214, v215 |
| ; GCN-NEXT: v_mul_f32_e32 v214, s4, v106 |
| ; GCN-NEXT: v_mul_f32_e32 v215, s4, v107 |
| ; GCN-NEXT: v_max3_f32 v213, v213, v214, v215 |
| ; GCN-NEXT: v_mul_f32_e32 v214, s4, v108 |
| ; GCN-NEXT: v_mul_f32_e32 v215, s4, v109 |
| ; GCN-NEXT: v_max3_f32 v213, v213, v214, v215 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[156:157], v[128:129], v[64:79] |
| ; GCN-NEXT: v_mul_f32_e32 v214, s4, v110 |
| ; GCN-NEXT: v_mul_f32_e32 v215, s4, v111 |
| ; GCN-NEXT: v_max3_f32 v213, v213, v214, v215 |
| ; GCN-NEXT: v_mul_f32_e32 v140, s4, v80 |
| ; GCN-NEXT: v_mul_f32_e32 v141, s4, v81 |
| ; GCN-NEXT: v_max3_f32 v140, v213, v140, v141 |
| ; GCN-NEXT: v_mul_f32_e32 v141, s4, v82 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[158:159], v[130:131], v[64:79] |
| ; GCN-NEXT: v_mul_f32_e32 v142, s4, v83 |
| ; GCN-NEXT: v_max3_f32 v140, v140, v141, v142 |
| ; GCN-NEXT: v_mul_f32_e32 v141, s4, v84 |
| ; GCN-NEXT: v_mul_f32_e32 v142, s4, v85 |
| ; GCN-NEXT: v_max3_f32 v140, v140, v141, v142 |
| ; GCN-NEXT: v_mul_f32_e32 v141, s4, v86 |
| ; GCN-NEXT: v_mul_f32_e32 v142, s4, v87 |
| ; GCN-NEXT: v_max3_f32 v140, v140, v141, v142 |
| ; GCN-NEXT: v_mul_f32_e32 v141, s4, v88 |
| ; GCN-NEXT: v_mul_f32_e32 v142, s4, v89 |
| ; GCN-NEXT: v_max3_f32 v140, v140, v141, v142 |
| ; GCN-NEXT: v_mul_f32_e32 v141, s4, v90 |
| ; GCN-NEXT: v_mul_f32_e32 v142, s4, v91 |
| ; GCN-NEXT: v_max3_f32 v140, v140, v141, v142 |
| ; GCN-NEXT: v_mul_f32_e32 v141, s4, v92 |
| ; GCN-NEXT: v_mul_f32_e32 v142, s4, v93 |
| ; GCN-NEXT: v_max3_f32 v140, v140, v141, v142 |
| ; GCN-NEXT: v_mul_f32_e32 v141, s4, v94 |
| ; GCN-NEXT: v_mul_f32_e32 v142, s4, v95 |
| ; GCN-NEXT: v_max3_f32 v140, v140, v141, v142 |
| ; GCN-NEXT: v_mul_f32_e32 v128, s4, v64 |
| ; GCN-NEXT: v_mul_f32_e32 v129, s4, v65 |
| ; GCN-NEXT: v_max3_f32 v128, v140, v128, v129 |
| ; GCN-NEXT: v_mul_f32_e32 v129, s4, v66 |
| ; GCN-NEXT: v_mul_f32_e32 v130, s4, v67 |
| ; GCN-NEXT: v_max3_f32 v128, v128, v129, v130 |
| ; GCN-NEXT: v_mul_f32_e32 v129, s4, v68 |
| ; GCN-NEXT: v_mul_f32_e32 v130, s4, v69 |
| ; GCN-NEXT: v_max3_f32 v128, v128, v129, v130 |
| ; GCN-NEXT: v_mul_f32_e32 v129, s4, v70 |
| ; GCN-NEXT: v_mul_f32_e32 v130, s4, v71 |
| ; GCN-NEXT: v_max3_f32 v128, v128, v129, v130 |
| ; GCN-NEXT: v_mul_f32_e32 v129, s4, v72 |
| ; GCN-NEXT: v_mul_f32_e32 v130, s4, v73 |
| ; GCN-NEXT: v_max3_f32 v128, v128, v129, v130 |
| ; GCN-NEXT: v_mul_f32_e32 v129, s4, v74 |
| ; GCN-NEXT: v_mul_f32_e32 v130, s4, v75 |
| ; GCN-NEXT: v_max3_f32 v128, v128, v129, v130 |
| ; GCN-NEXT: v_mul_f32_e32 v129, s4, v76 |
| ; GCN-NEXT: v_mul_f32_e32 v130, s4, v77 |
| ; GCN-NEXT: v_max3_f32 v128, v128, v129, v130 |
| ; GCN-NEXT: v_mul_f32_e32 v129, s4, v78 |
| ; GCN-NEXT: v_mul_f32_e32 v130, s4, v79 |
| ; GCN-NEXT: v_max3_f32 v128, v128, v129, v130 |
| ; GCN-NEXT: ds_bpermute_b32 v129, v196, v128 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_read_b128 v[130:133], v198 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[134:137], v198 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_max_f32_e32 v129, v129, v129 |
| ; GCN-NEXT: v_max_f32_e32 v128, v128, v129 |
| ; GCN-NEXT: ds_bpermute_b32 v129, v196, v128 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: v_cndmask_b32_e64 v128, v129, v128, s[12:13] |
| ; GCN-NEXT: v_max_f32_e32 v128, v128, v128 |
| ; GCN-NEXT: v_max_f32_e32 v128, v212, v128 |
| ; GCN-NEXT: v_fma_f32 v113, s4, v113, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v138, 0x3fb8aa3b, v113 |
| ; GCN-NEXT: v_fma_f32 v113, s4, v114, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v139, 0x3fb8aa3b, v113 |
| ; GCN-NEXT: v_fma_f32 v113, s4, v115, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v140, 0x3fb8aa3b, v113 |
| ; GCN-NEXT: v_fma_f32 v113, s4, v116, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v141, 0x3fb8aa3b, v113 |
| ; GCN-NEXT: v_fma_f32 v113, s4, v117, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v142, 0x3fb8aa3b, v113 |
| ; GCN-NEXT: v_fma_f32 v113, s4, v118, -v128 |
| ; GCN-NEXT: v_fma_f32 v112, s4, v112, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v143, 0x3fb8aa3b, v113 |
| ; GCN-NEXT: v_fma_f32 v113, s4, v119, -v128 |
| ; GCN-NEXT: v_fma_f32 v118, s4, v120, -v128 |
| ; GCN-NEXT: v_fma_f32 v120, s4, v121, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v112, 0x3fb8aa3b, v112 |
| ; GCN-NEXT: v_mul_f32_e32 v144, 0x3fb8aa3b, v113 |
| ; GCN-NEXT: v_mul_f32_e32 v149, 0x3fb8aa3b, v120 |
| ; GCN-NEXT: v_fma_f32 v120, s4, v122, -v128 |
| ; GCN-NEXT: v_exp_f32_e32 v114, v138 |
| ; GCN-NEXT: v_exp_f32_e32 v115, v139 |
| ; GCN-NEXT: v_exp_f32_e32 v116, v140 |
| ; GCN-NEXT: v_exp_f32_e32 v117, v141 |
| ; GCN-NEXT: v_mul_f32_e32 v148, 0x3fb8aa3b, v118 |
| ; GCN-NEXT: v_exp_f32_e32 v118, v142 |
| ; GCN-NEXT: v_mul_f32_e32 v150, 0x3fb8aa3b, v120 |
| ; GCN-NEXT: v_exp_f32_e32 v120, v144 |
| ; GCN-NEXT: v_exp_f32_e32 v113, v112 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v119, v114 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v121, v116 |
| ; GCN-NEXT: v_sub_f32_e32 v129, v211, v128 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v112, v113 |
| ; GCN-NEXT: v_mul_f32_e32 v129, 0x3fb8aa3b, v129 |
| ; GCN-NEXT: ds_read_b128 v[138:141], v198 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_fma_f32 v122, s4, v123, -v128 |
| ; GCN-NEXT: v_pack_b32_f16 v146, v112, v119 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v112, v115 |
| ; GCN-NEXT: v_mul_f32_e32 v151, 0x3fb8aa3b, v122 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v123, v117 |
| ; GCN-NEXT: v_fma_f32 v122, s4, v124, -v128 |
| ; GCN-NEXT: v_pack_b32_f16 v147, v112, v121 |
| ; GCN-NEXT: v_exp_f32_e32 v112, v129 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v124, v118 |
| ; GCN-NEXT: v_mul_f32_e32 v129, 0x3fb8aa3b, v122 |
| ; GCN-NEXT: v_fma_f32 v125, s4, v125, -v128 |
| ; GCN-NEXT: v_pk_mul_f32 v[0:1], v[0:1], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[2:3], v[2:3], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[4:5], v[4:5], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[6:7], v[6:7], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[8:9], v[8:9], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[10:11], v[10:11], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[12:13], v[12:13], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[14:15], v[14:15], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[32:33], v[32:33], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[34:35], v[34:35], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[130:131], v[146:147], v[0:15] |
| ; GCN-NEXT: v_exp_f32_e32 v119, v143 |
| ; GCN-NEXT: ds_read_b128 v[142:145], v198 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_pk_mul_f32 v[36:37], v[36:37], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[38:39], v[38:39], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[40:41], v[40:41], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[42:43], v[42:43], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[44:45], v[44:45], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[46:47], v[46:47], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[16:17], v[16:17], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[18:19], v[18:19], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[134:135], v[146:147], v[32:47] |
| ; GCN-NEXT: v_mul_f32_e64 v20, v20, v112 |
| ; GCN-NEXT: v_mul_f32_e64 v21, v21, v112 |
| ; GCN-NEXT: v_mul_f32_e64 v22, v22, v112 |
| ; GCN-NEXT: v_mul_f32_e64 v23, v23, v112 |
| ; GCN-NEXT: v_mul_f32_e64 v24, v24, v112 |
| ; GCN-NEXT: v_mul_f32_e64 v25, v25, v112 |
| ; GCN-NEXT: v_pk_mul_f32 v[26:27], v[26:27], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[28:29], v[28:29], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[30:31], v[30:31], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[48:49], v[48:49], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[50:51], v[50:51], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[52:53], v[52:53], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[54:55], v[54:55], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[56:57], v[56:57], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[58:59], v[58:59], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[60:61], v[60:61], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[62:63], v[62:63], v[112:113] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pack_b32_f16 v134, v123, v124 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v130, v119 |
| ; GCN-NEXT: v_fma_f32 v124, s4, v126, -v128 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v126, v120 |
| ; GCN-NEXT: v_exp_f32_e32 v121, v148 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[138:139], v[146:147], v[16:31] |
| ; GCN-NEXT: v_exp_f32_e32 v122, v149 |
| ; GCN-NEXT: v_pack_b32_f16 v135, v130, v126 |
| ; GCN-NEXT: v_mul_f32_e32 v138, 0x3fb8aa3b, v124 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v126, v121 |
| ; GCN-NEXT: v_mul_f32_e32 v125, 0x3fb8aa3b, v125 |
| ; GCN-NEXT: v_fma_f32 v139, s4, v96, -v128 |
| ; GCN-NEXT: v_fma_f32 v127, s4, v127, -v128 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[142:143], v[146:147], v[48:63] |
| ; GCN-NEXT: v_exp_f32_e32 v123, v150 |
| ; GCN-NEXT: v_mul_f32_e32 v127, 0x3fb8aa3b, v127 |
| ; GCN-NEXT: v_fma_f32 v143, s4, v101, -v128 |
| ; GCN-NEXT: v_fma_f32 v64, s4, v64, -v128 |
| ; GCN-NEXT: v_fma_f32 v65, s4, v65, -v128 |
| ; GCN-NEXT: v_fma_f32 v68, s4, v68, -v128 |
| ; GCN-NEXT: v_fma_f32 v69, s4, v69, -v128 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[132:133], v[134:135], v[0:15] |
| ; GCN-NEXT: v_exp_f32_e32 v124, v151 |
| ; GCN-NEXT: ds_read_b128 v[130:133], v197 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[146:149], v197 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[136:137], v[134:135], v[32:47] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v136, v122 |
| ; GCN-NEXT: v_exp_f32_e32 v96, v129 |
| ; GCN-NEXT: v_fma_f32 v137, s4, v97, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v129, 0x3fb8aa3b, v139 |
| ; GCN-NEXT: v_pack_b32_f16 v126, v126, v136 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v136, v123 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[140:141], v[134:135], v[16:31] |
| ; GCN-NEXT: v_exp_f32_e32 v97, v125 |
| ; GCN-NEXT: v_mul_f32_e32 v125, 0x3fb8aa3b, v137 |
| ; GCN-NEXT: v_fma_f32 v137, s4, v98, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v142, 0x3fb8aa3b, v137 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[144:145], v[134:135], v[48:63] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v134, v124 |
| ; GCN-NEXT: v_fma_f32 v135, s4, v99, -v128 |
| ; GCN-NEXT: v_exp_f32_e32 v98, v138 |
| ; GCN-NEXT: v_exp_f32_e32 v99, v127 |
| ; GCN-NEXT: v_mul_f32_e32 v150, 0x3fb8aa3b, v135 |
| ; GCN-NEXT: v_pack_b32_f16 v127, v136, v134 |
| ; GCN-NEXT: ds_read_b128 v[134:137], v197 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[138:141], v197 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[130:131], v[126:127], v[0:15] |
| ; GCN-NEXT: v_fma_f32 v131, s4, v100, -v128 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v130, v96 |
| ; GCN-NEXT: v_exp_f32_e32 v100, v129 |
| ; GCN-NEXT: v_mul_f32_e32 v129, 0x3fb8aa3b, v131 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v131, v97 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v199, v[188:189] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v200, v[190:191] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v201, v[192:193] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v202, v[194:195] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[146:147], v[126:127], v[32:47] |
| ; GCN-NEXT: v_exp_f32_e32 v101, v125 |
| ; GCN-NEXT: v_pack_b32_f16 v146, v130, v131 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_load_dwordx2 v[130:131], v210, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mul_f32_e32 v125, 0x3fb8aa3b, v143 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v147, v98 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[134:135], v[126:127], v[16:31] |
| ; GCN-NEXT: v_fma_f32 v134, s4, v102, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v156, 0x3fb8aa3b, v134 |
| ; GCN-NEXT: buffer_load_dwordx2 v[134:135], v207, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_exp_f32_e32 v102, v142 |
| ; GCN-NEXT: buffer_load_dwordx2 v[142:143], v208, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx2 v[144:145], v209, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[138:139], v[126:127], v[48:63] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v126, v99 |
| ; GCN-NEXT: v_fma_f32 v127, s4, v103, -v128 |
| ; GCN-NEXT: v_exp_f32_e32 v103, v150 |
| ; GCN-NEXT: v_fma_f32 v139, s4, v105, -v128 |
| ; GCN-NEXT: v_pack_b32_f16 v147, v147, v126 |
| ; GCN-NEXT: v_mul_f32_e32 v138, 0x3fb8aa3b, v127 |
| ; GCN-NEXT: v_perm_b32 v152, v135, v131, s5 |
| ; GCN-NEXT: v_perm_b32 v154, v135, v131, s7 |
| ; GCN-NEXT: v_fma_f32 v135, s4, v104, -v128 |
| ; GCN-NEXT: v_perm_b32 v126, v134, v130, s5 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[132:133], v[146:147], v[0:15] |
| ; GCN-NEXT: v_perm_b32 v150, v134, v130, s7 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v134, v100 |
| ; GCN-NEXT: v_exp_f32_e32 v104, v129 |
| ; GCN-NEXT: v_mul_f32_e32 v129, 0x3fb8aa3b, v135 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v135, v101 |
| ; GCN-NEXT: ds_read_b128 v[130:133], v198 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_perm_b32 v127, v144, v142, s5 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[148:149], v[146:147], v[32:47] |
| ; GCN-NEXT: v_pack_b32_f16 v148, v134, v135 |
| ; GCN-NEXT: v_fma_f32 v135, s4, v106, -v128 |
| ; GCN-NEXT: v_exp_f32_e32 v105, v125 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v134, v102 |
| ; GCN-NEXT: v_perm_b32 v151, v144, v142, s7 |
| ; GCN-NEXT: v_perm_b32 v153, v145, v143, s5 |
| ; GCN-NEXT: v_perm_b32 v155, v145, v143, s7 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[136:137], v[146:147], v[16:31] |
| ; GCN-NEXT: v_exp_f32_e32 v106, v156 |
| ; GCN-NEXT: v_mul_f32_e32 v156, 0x3fb8aa3b, v135 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v135, v103 |
| ; GCN-NEXT: v_fma_f32 v136, s4, v107, -v128 |
| ; GCN-NEXT: ds_read_b128 v[142:145], v198 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mul_f32_e32 v125, 0x3fb8aa3b, v139 |
| ; GCN-NEXT: v_pack_b32_f16 v149, v134, v135 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[140:141], v[146:147], v[48:63] |
| ; GCN-NEXT: v_mul_f32_e32 v146, 0x3fb8aa3b, v136 |
| ; GCN-NEXT: ds_read_b128 v[134:137], v198 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_exp_f32_e32 v107, v138 |
| ; GCN-NEXT: ds_read_b128 v[138:141], v198 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[130:131], v[148:149], v[0:15] |
| ; GCN-NEXT: v_fma_f32 v131, s4, v108, -v128 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v130, v104 |
| ; GCN-NEXT: v_exp_f32_e32 v108, v129 |
| ; GCN-NEXT: v_mul_f32_e32 v129, 0x3fb8aa3b, v131 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v131, v105 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[142:143], v[148:149], v[32:47] |
| ; GCN-NEXT: v_fma_f32 v142, s4, v109, -v128 |
| ; GCN-NEXT: v_exp_f32_e32 v109, v125 |
| ; GCN-NEXT: v_mul_f32_e32 v125, 0x3fb8aa3b, v142 |
| ; GCN-NEXT: v_pack_b32_f16 v142, v130, v131 |
| ; GCN-NEXT: v_fma_f32 v131, s4, v110, -v128 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v130, v106 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[134:135], v[148:149], v[16:31] |
| ; GCN-NEXT: v_mul_f32_e32 v134, 0x3fb8aa3b, v131 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v131, v107 |
| ; GCN-NEXT: v_exp_f32_e32 v110, v156 |
| ; GCN-NEXT: v_fma_f32 v135, s4, v111, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v135, 0x3fb8aa3b, v135 |
| ; GCN-NEXT: v_pack_b32_f16 v143, v130, v131 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[138:139], v[148:149], v[48:63] |
| ; GCN-NEXT: v_exp_f32_e32 v111, v146 |
| ; GCN-NEXT: v_fma_f32 v139, s4, v80, -v128 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v138, v108 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[132:133], v[142:143], v[0:15] |
| ; GCN-NEXT: v_exp_f32_e32 v80, v129 |
| ; GCN-NEXT: ds_read_b128 v[130:133], v197 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[146:149], v197 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mul_f32_e32 v129, 0x3fb8aa3b, v139 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v139, v109 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[144:145], v[142:143], v[32:47] |
| ; GCN-NEXT: v_fma_f32 v144, s4, v81, -v128 |
| ; GCN-NEXT: v_exp_f32_e32 v81, v125 |
| ; GCN-NEXT: v_mul_f32_e32 v125, 0x3fb8aa3b, v144 |
| ; GCN-NEXT: v_pack_b32_f16 v144, v138, v139 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[136:137], v[142:143], v[16:31] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v136, v110 |
| ; GCN-NEXT: v_fma_f32 v137, s4, v82, -v128 |
| ; GCN-NEXT: v_exp_f32_e32 v82, v134 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v134, v111 |
| ; GCN-NEXT: v_mul_f32_e32 v156, 0x3fb8aa3b, v137 |
| ; GCN-NEXT: v_fma_f32 v137, s4, v83, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v157, 0x3fb8aa3b, v137 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[140:141], v[142:143], v[48:63] |
| ; GCN-NEXT: v_exp_f32_e32 v83, v135 |
| ; GCN-NEXT: v_pack_b32_f16 v145, v136, v134 |
| ; GCN-NEXT: ds_read_b128 v[134:137], v197 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[138:141], v197 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v199, v[126:127] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v200, v[150:151] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[130:131], v[144:145], v[0:15] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v201, v[152:153] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v202, v[154:155] |
| ; GCN-NEXT: v_fma_f32 v127, s4, v84, -v128 |
| ; GCN-NEXT: v_exp_f32_e32 v84, v129 |
| ; GCN-NEXT: v_fma_f32 v130, s4, v85, -v128 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v126, v80 |
| ; GCN-NEXT: v_mul_f32_e32 v129, 0x3fb8aa3b, v127 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[146:147], v[144:145], v[32:47] |
| ; GCN-NEXT: v_exp_f32_e32 v85, v125 |
| ; GCN-NEXT: v_mul_f32_e32 v125, 0x3fb8aa3b, v130 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_load_dwordx2 v[130:131], v206, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v127, v81 |
| ; GCN-NEXT: v_pack_b32_f16 v126, v126, v127 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[134:135], v[144:145], v[16:31] |
| ; GCN-NEXT: v_fma_f32 v134, s4, v86, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v158, 0x3fb8aa3b, v134 |
| ; GCN-NEXT: buffer_load_dwordx2 v[134:135], v203, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx2 v[142:143], v204, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx2 v[146:147], v205, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v127, v82 |
| ; GCN-NEXT: v_exp_f32_e32 v86, v156 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[138:139], v[144:145], v[48:63] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v138, v83 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_fma_f32 v139, s4, v87, -v128 |
| ; GCN-NEXT: v_exp_f32_e32 v87, v157 |
| ; GCN-NEXT: v_pack_b32_f16 v127, v127, v138 |
| ; GCN-NEXT: v_fma_f32 v138, s4, v89, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v139, 0x3fb8aa3b, v139 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[132:133], v[126:127], v[0:15] |
| ; GCN-NEXT: ; implicit-def: $sgpr0 |
| ; GCN-NEXT: v_perm_b32 v154, v135, v131, s5 |
| ; GCN-NEXT: v_perm_b32 v156, v135, v131, s7 |
| ; GCN-NEXT: v_fma_f32 v135, s4, v88, -v128 |
| ; GCN-NEXT: v_perm_b32 v150, v134, v130, s5 |
| ; GCN-NEXT: v_perm_b32 v152, v134, v130, s7 |
| ; GCN-NEXT: ds_read_b128 v[130:133], v198 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v134, v84 |
| ; GCN-NEXT: v_exp_f32_e32 v88, v129 |
| ; GCN-NEXT: v_mul_f32_e32 v129, 0x3fb8aa3b, v135 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v135, v85 |
| ; GCN-NEXT: v_perm_b32 v151, v146, v142, s5 |
| ; GCN-NEXT: v_perm_b32 v153, v146, v142, s7 |
| ; GCN-NEXT: v_perm_b32 v155, v147, v143, s5 |
| ; GCN-NEXT: v_perm_b32 v157, v147, v143, s7 |
| ; GCN-NEXT: ds_read_b128 v[142:145], v198 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[148:149], v[126:127], v[32:47] |
| ; GCN-NEXT: v_exp_f32_e32 v89, v125 |
| ; GCN-NEXT: v_pack_b32_f16 v146, v134, v135 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v134, v86 |
| ; GCN-NEXT: v_fma_f32 v135, s4, v90, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v125, 0x3fb8aa3b, v138 |
| ; GCN-NEXT: v_mul_f32_e32 v148, 0x3fb8aa3b, v135 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[136:137], v[126:127], v[16:31] |
| ; GCN-NEXT: v_exp_f32_e32 v90, v158 |
| ; GCN-NEXT: v_mul_f32_e32 v158, 0x3fb8aa3b, v64 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[140:141], v[126:127], v[48:63] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v126, v87 |
| ; GCN-NEXT: v_fma_f32 v127, s4, v91, -v128 |
| ; GCN-NEXT: v_exp_f32_e32 v91, v139 |
| ; GCN-NEXT: v_mul_f32_e32 v127, 0x3fb8aa3b, v127 |
| ; GCN-NEXT: v_pack_b32_f16 v147, v134, v126 |
| ; GCN-NEXT: ds_read_b128 v[134:137], v198 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[138:141], v198 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[130:131], v[146:147], v[0:15] |
| ; GCN-NEXT: v_fma_f32 v130, s4, v92, -v128 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v126, v88 |
| ; GCN-NEXT: v_exp_f32_e32 v92, v129 |
| ; GCN-NEXT: v_mul_f32_e32 v129, 0x3fb8aa3b, v130 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v130, v89 |
| ; GCN-NEXT: v_fma_f32 v131, s4, v93, -v128 |
| ; GCN-NEXT: v_pack_b32_f16 v130, v126, v130 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[142:143], v[146:147], v[32:47] |
| ; GCN-NEXT: v_exp_f32_e32 v93, v125 |
| ; GCN-NEXT: v_fma_f32 v126, s4, v94, -v128 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v125, v90 |
| ; GCN-NEXT: v_mul_f32_e32 v143, 0x3fb8aa3b, v126 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v126, v91 |
| ; GCN-NEXT: v_mul_f32_e32 v142, 0x3fb8aa3b, v131 |
| ; GCN-NEXT: v_fma_f32 v131, s4, v95, -v128 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[134:135], v[146:147], v[16:31] |
| ; GCN-NEXT: v_exp_f32_e32 v94, v148 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v64, v93 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[138:139], v[146:147], v[48:63] |
| ; GCN-NEXT: v_exp_f32_e32 v95, v127 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v127, v92 |
| ; GCN-NEXT: v_mul_f32_e32 v138, 0x3fb8aa3b, v131 |
| ; GCN-NEXT: v_pack_b32_f16 v131, v125, v126 |
| ; GCN-NEXT: s_nop 1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[132:133], v[130:131], v[0:15] |
| ; GCN-NEXT: v_exp_f32_e32 v125, v129 |
| ; GCN-NEXT: ds_read_b128 v[132:135], v197 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[146:149], v197 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[144:145], v[130:131], v[32:47] |
| ; GCN-NEXT: v_mul_f32_e32 v144, 0x3fb8aa3b, v65 |
| ; GCN-NEXT: v_fma_f32 v65, s4, v66, -v128 |
| ; GCN-NEXT: v_exp_f32_e32 v126, v142 |
| ; GCN-NEXT: v_pack_b32_f16 v142, v127, v64 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v64, v94 |
| ; GCN-NEXT: v_mul_f32_e32 v145, 0x3fb8aa3b, v65 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v65, v95 |
| ; GCN-NEXT: v_fma_f32 v66, s4, v67, -v128 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[136:137], v[130:131], v[16:31] |
| ; GCN-NEXT: v_exp_f32_e32 v127, v143 |
| ; GCN-NEXT: v_pack_b32_f16 v143, v64, v65 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[140:141], v[130:131], v[48:63] |
| ; GCN-NEXT: v_exp_f32_e32 v129, v138 |
| ; GCN-NEXT: v_mul_f32_e32 v141, 0x3fb8aa3b, v66 |
| ; GCN-NEXT: ds_read_b128 v[64:67], v197 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[136:139], v197 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v199, v[150:151] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v200, v[152:153] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[132:133], v[142:143], v[0:15] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v132, v125 |
| ; GCN-NEXT: v_exp_f32_e32 v130, v158 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v201, v[154:155] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v202, v[156:157] |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[146:147], v[142:143], v[32:47] |
| ; GCN-NEXT: v_mul_f32_e32 v146, 0x3fb8aa3b, v68 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v68, v126 |
| ; GCN-NEXT: v_exp_f32_e32 v131, v144 |
| ; GCN-NEXT: v_mul_f32_e32 v144, 0x3fb8aa3b, v69 |
| ; GCN-NEXT: v_fma_f32 v69, s4, v71, -v128 |
| ; GCN-NEXT: v_pack_b32_f16 v140, v132, v68 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v68, v129 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[64:65], v[142:143], v[16:31] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v64, v127 |
| ; GCN-NEXT: v_exp_f32_e32 v132, v145 |
| ; GCN-NEXT: v_fma_f32 v65, s4, v70, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v65, 0x3fb8aa3b, v65 |
| ; GCN-NEXT: v_fma_f32 v145, s4, v73, -v128 |
| ; GCN-NEXT: v_mul_f32_e32 v147, 0x3fb8aa3b, v145 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[136:137], v[142:143], v[48:63] |
| ; GCN-NEXT: v_exp_f32_e32 v133, v141 |
| ; GCN-NEXT: v_mul_f32_e32 v142, 0x3fb8aa3b, v69 |
| ; GCN-NEXT: v_pack_b32_f16 v141, v64, v68 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_read_b128 v[68:71], v198 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_fma_f32 v143, s4, v72, -v128 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v64, v130 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[134:135], v[140:141], v[0:15] |
| ; GCN-NEXT: v_exp_f32_e32 v72, v146 |
| ; GCN-NEXT: v_mul_f32_e32 v146, 0x3fb8aa3b, v143 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v143, v131 |
| ; GCN-NEXT: ds_read_b128 v[134:137], v198 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_pack_b32_f16 v64, v64, v143 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[148:149], v[140:141], v[32:47] |
| ; GCN-NEXT: v_exp_f32_e32 v73, v144 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[66:67], v[140:141], v[16:31] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v66, v132 |
| ; GCN-NEXT: v_fma_f32 v67, s4, v74, -v128 |
| ; GCN-NEXT: v_exp_f32_e32 v74, v65 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v65, v133 |
| ; GCN-NEXT: v_mul_f32_e32 v67, 0x3fb8aa3b, v67 |
| ; GCN-NEXT: v_pack_b32_f16 v65, v66, v65 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[138:139], v[140:141], v[48:63] |
| ; GCN-NEXT: v_fma_f32 v138, s4, v75, -v128 |
| ; GCN-NEXT: v_exp_f32_e32 v75, v142 |
| ; GCN-NEXT: v_mul_f32_e32 v148, 0x3fb8aa3b, v138 |
| ; GCN-NEXT: ds_read_b128 v[138:141], v198 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[142:145], v198 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v66, v72 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[68:69], v[64:65], v[0:15] |
| ; GCN-NEXT: v_fma_f32 v68, s4, v76, -v128 |
| ; GCN-NEXT: v_exp_f32_e32 v76, v146 |
| ; GCN-NEXT: v_mul_f32_e32 v146, 0x3fb8aa3b, v68 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v68, v73 |
| ; GCN-NEXT: v_fma_f32 v69, s4, v77, -v128 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[134:135], v[64:65], v[32:47] |
| ; GCN-NEXT: v_exp_f32_e32 v77, v147 |
| ; GCN-NEXT: v_pack_b32_f16 v134, v66, v68 |
| ; GCN-NEXT: v_fma_f32 v68, s4, v78, -v128 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v66, v74 |
| ; GCN-NEXT: v_mul_f32_e32 v147, 0x3fb8aa3b, v69 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[138:139], v[64:65], v[16:31] |
| ; GCN-NEXT: v_exp_f32_e32 v78, v67 |
| ; GCN-NEXT: v_mul_f32_e32 v138, 0x3fb8aa3b, v68 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v139, v76 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[142:143], v[64:65], v[48:63] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v64, v75 |
| ; GCN-NEXT: v_fma_f32 v65, s4, v79, -v128 |
| ; GCN-NEXT: v_exp_f32_e32 v79, v148 |
| ; GCN-NEXT: v_mul_f32_e32 v128, 0x3fb8aa3b, v65 |
| ; GCN-NEXT: v_pack_b32_f16 v135, v66, v64 |
| ; GCN-NEXT: s_nop 1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[70:71], v[134:135], v[0:15] |
| ; GCN-NEXT: v_exp_f32_e32 v142, v146 |
| ; GCN-NEXT: ds_read_b128 v[68:71], v197 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[64:67], v197 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[136:137], v[134:135], v[32:47] |
| ; GCN-NEXT: v_exp_f32_e32 v137, v147 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v136, v77 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[140:141], v[134:135], v[16:31] |
| ; GCN-NEXT: v_exp_f32_e32 v138, v138 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v140, v78 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[144:145], v[134:135], v[48:63] |
| ; GCN-NEXT: s_nop 10 |
| ; GCN-NEXT: v_exp_f32_e32 v52, v128 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v50, v137 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v51, v142 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v54, v138 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v53, v52 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v49, v79 |
| ; GCN-NEXT: v_pack_b32_f16 v50, v51, v50 |
| ; GCN-NEXT: v_pack_b32_f16 v48, v139, v136 |
| ; GCN-NEXT: v_pack_b32_f16 v51, v54, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, 0, v113 |
| ; GCN-NEXT: v_add_f32_e32 v53, v114, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v115, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v116, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v117, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v118, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v119, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v120, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v121, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v122, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v123, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v124, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v96, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v97, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v98, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v99, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v100, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v101, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v102, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v103, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v104, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v105, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v106, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v107, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v108, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v109, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v110, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v111, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v80, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v81, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v82, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v83, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v84, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v85, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v86, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v87, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v88, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v89, v53 |
| ; GCN-NEXT: v_pack_b32_f16 v49, v140, v49 |
| ; GCN-NEXT: v_add_f32_e32 v53, v90, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v91, v53 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[68:69], v[48:49], v[0:15] |
| ; GCN-NEXT: v_add_f32_e32 v53, v92, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v93, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v94, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v95, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v125, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v126, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v127, v53 |
| ; GCN-NEXT: v_add_f32_e32 v53, v129, v53 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[70:71], v[50:51], v[0:15] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[64:65], v[48:49], v[32:47] |
| ; GCN-NEXT: s_nop 9 |
| ; GCN-NEXT: v_add_f32_e32 v0, v130, v53 |
| ; GCN-NEXT: v_add_f32_e32 v0, v131, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v132, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v133, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v72, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v73, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v74, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v75, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v76, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v77, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v78, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v79, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v142, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v137, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v138, v0 |
| ; GCN-NEXT: v_add_f32_e32 v4, v52, v0 |
| ; GCN-NEXT: ds_bpermute_b32 v5, v196, v4 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_read_b128 v[0:3], v197 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[0:1], v[48:49], v[16:31] |
| ; GCN-NEXT: v_add_f32_e32 v2, v4, v5 |
| ; GCN-NEXT: ds_bpermute_b32 v3, v196, v2 |
| ; GCN-NEXT: ; implicit-def: $vgpr4 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[12:13] |
| ; GCN-NEXT: v_fmac_f32_e32 v0, v4, v112 |
| ; GCN-NEXT: ds_read_b128 v[0:3], v197 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[66:67], v[50:51], v[32:47] |
| ; GCN-NEXT: s_endpgm |
| |
| attributes #0 = {"amdgpu-flat-work-group-size"="256,256"} |
| !0 = !{i64 2862105} |
| |
| ... |
| |
| --- |
| name: largeInterleave |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| stackPtrOffsetReg: '$sgpr32' |
| occupancy: 7 |
| body: | |
| bb.0: |
| liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2, $sgpr3, $sgpr4 |
| %11:vgpr_32 = IMPLICIT_DEF |
| %1:sgpr_512 = IMPLICIT_DEF |
| %16:vgpr_32 = IMPLICIT_DEF |
| %443:sgpr_128 = IMPLICIT_DEF |
| %18:sreg_32 = IMPLICIT_DEF |
| %25:vgpr_32 = IMPLICIT_DEF |
| %23:vgpr_32 = IMPLICIT_DEF |
| %391:vreg_128_align2 = IMPLICIT_DEF |
| %24:vgpr_32 = IMPLICIT_DEF |
| %392:vreg_128_align2 = IMPLICIT_DEF |
| %401:vreg_128_align2 = IMPLICIT_DEF |
| %406:vreg_128_align2 = IMPLICIT_DEF |
| %48:vgpr_32 = IMPLICIT_DEF |
| %473:sgpr_128 = IMPLICIT_DEF |
| %411:vreg_128_align2 = IMPLICIT_DEF |
| %416:vreg_128_align2 = IMPLICIT_DEF |
| %421:vreg_128_align2 = IMPLICIT_DEF |
| %426:vreg_128_align2 = IMPLICIT_DEF |
| %1114:sgpr_32 = IMPLICIT_DEF |
| %39:vgpr_32 = IMPLICIT_DEF |
| %484:sreg_64_xexec = IMPLICIT_DEF |
| %3346:vgpr_32 = IMPLICIT_DEF |
| %1422:sreg_32 = IMPLICIT_DEF |
| %1424:sreg_32 = IMPLICIT_DEF |
| %15:vgpr_32 = IMPLICIT_DEF |
| %494:sreg_32 = IMPLICIT_DEF |
| %47:vgpr_32 = IMPLICIT_DEF |
| %41:vgpr_32 = IMPLICIT_DEF |
| %42:vgpr_32 = IMPLICIT_DEF |
| %43:vgpr_32 = IMPLICIT_DEF |
| %44:vgpr_32 = IMPLICIT_DEF |
| %45:vgpr_32 = IMPLICIT_DEF |
| %50:sreg_32 = IMPLICIT_DEF |
| %3347:vgpr_32 = IMPLICIT_DEF |
| %3329:vgpr_32 = IMPLICIT_DEF |
| %3330:vgpr_32 = IMPLICIT_DEF |
| %3331:vgpr_32 = IMPLICIT_DEF |
| %3332:vgpr_32 = IMPLICIT_DEF |
| %3333:vgpr_32 = IMPLICIT_DEF |
| %2986:vreg_512_align2 = IMPLICIT_DEF |
| %3038:vreg_512_align2 = IMPLICIT_DEF |
| %2980:vreg_512_align2 = IMPLICIT_DEF |
| %3003:vreg_512_align2 = IMPLICIT_DEF |
| %3334:vgpr_32 = IMPLICIT_DEF |
| %3335:vgpr_32 = IMPLICIT_DEF |
| %3336:vgpr_32 = IMPLICIT_DEF |
| %3337:vgpr_32 = IMPLICIT_DEF |
| %3338:vgpr_32 = IMPLICIT_DEF |
| %3339:vgpr_32 = IMPLICIT_DEF |
| %3345:vgpr_32 = IMPLICIT_DEF |
| %3340:vgpr_32 = IMPLICIT_DEF |
| %3341:vgpr_32 = IMPLICIT_DEF |
| %3342:vgpr_32 = IMPLICIT_DEF |
| %3343:vgpr_32 = IMPLICIT_DEF |
| %3344:vgpr_32 = IMPLICIT_DEF |
| %84:vgpr_32 = COPY %3347 |
| %86:vgpr_32 = COPY %3347:vgpr_32 |
| IGLP_OPT 2 |
| %593:sreg_32_xm0 = V_READFIRSTLANE_B32 %11:vgpr_32, implicit $exec |
| %595:vgpr_32 = V_LSHL_ADD_U32_e64 %593:sreg_32_xm0, 4, %3329:vgpr_32, implicit $exec |
| %597:vgpr_32 = nsw V_MUL_LO_U32_e64 %595:vgpr_32, %1.sub6:sgpr_512, implicit $exec |
| %599:vgpr_32 = V_ADD_LSHL_U32_e64 %597:vgpr_32, %16:vgpr_32, 1, implicit $exec |
| %601:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %599:vgpr_32, %443:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %602:vgpr_32 = V_ADD_U32_e32 %18:sreg_32, %599:vgpr_32, implicit $exec |
| %603:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %602:vgpr_32, %443:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %605:sreg_32 = S_LSHL_B32 %593:sreg_32_xm0, 7, implicit-def dead $scc |
| %606:vgpr_32 = V_ADD_LSHL_U32_e64 %25:vgpr_32, %605:sreg_32, 1, implicit $exec |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %601:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %603:vreg_128_align2, 1024, 0, implicit $exec |
| %608:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %599:vgpr_32, %443:sgpr_128, 0, 64, 0, 0, implicit $exec |
| %610:vgpr_32 = V_ADD_U32_e32 64, %602:vgpr_32, implicit $exec |
| %611:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %610:vgpr_32, %443:sgpr_128, 0, 0, 0, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %612:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 0, 0, implicit $exec |
| early-clobber %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_vgprcd_e64 %612.sub0_sub1:vreg_128_align2, %391.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %612.sub2_sub3:vreg_128_align2, %391.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %626:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 512, 0, implicit $exec |
| early-clobber %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_vgprcd_e64 %626.sub0_sub1:vreg_128_align2, %391.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %626.sub2_sub3:vreg_128_align2, %391.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %638:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1024, 0, implicit $exec |
| early-clobber %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_vgprcd_e64 %638.sub0_sub1:vreg_128_align2, %391.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %638.sub2_sub3:vreg_128_align2, %391.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %650:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1536, 0, implicit $exec |
| early-clobber %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_vgprcd_e64 %650.sub0_sub1:vreg_128_align2, %391.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %650.sub2_sub3:vreg_128_align2, %391.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %662:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 0, 0, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %662.sub0_sub1:vreg_128_align2, %392.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %662.sub2_sub3:vreg_128_align2, %392.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %673:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 512, 0, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %673.sub0_sub1:vreg_128_align2, %392.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %673.sub2_sub3:vreg_128_align2, %392.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %684:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1024, 0, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %684.sub0_sub1:vreg_128_align2, %392.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %684.sub2_sub3:vreg_128_align2, %392.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %695:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1536, 0, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %695.sub0_sub1:vreg_128_align2, %392.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %695.sub2_sub3:vreg_128_align2, %392.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %608:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %611:vreg_128_align2, 1024, 0, implicit $exec |
| %706:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %599:vgpr_32, %443:sgpr_128, 0, 128, 0, 0, implicit $exec |
| %708:vgpr_32 = V_ADD_U32_e32 128, %602:vgpr_32, implicit $exec |
| %709:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %708:vgpr_32, %443:sgpr_128, 0, 0, 0, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %710:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 0, 0, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %710.sub0_sub1:vreg_128_align2, %401.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %710.sub2_sub3:vreg_128_align2, %401.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %721:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 512, 0, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %721.sub0_sub1:vreg_128_align2, %401.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %721.sub2_sub3:vreg_128_align2, %401.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %732:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1024, 0, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %732.sub0_sub1:vreg_128_align2, %401.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %732.sub2_sub3:vreg_128_align2, %401.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %743:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1536, 0, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %743.sub0_sub1:vreg_128_align2, %401.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %743.sub2_sub3:vreg_128_align2, %401.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %754:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 0, 0, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %754.sub0_sub1:vreg_128_align2, %406.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %754.sub2_sub3:vreg_128_align2, %406.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %765:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 512, 0, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %765.sub0_sub1:vreg_128_align2, %406.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %765.sub2_sub3:vreg_128_align2, %406.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %776:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1024, 0, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %776.sub0_sub1:vreg_128_align2, %406.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %776.sub2_sub3:vreg_128_align2, %406.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %787:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1536, 0, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %787.sub0_sub1:vreg_128_align2, %406.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %787.sub2_sub3:vreg_128_align2, %406.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %706:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %709:vreg_128_align2, 1024, 0, implicit $exec |
| %798:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %599:vgpr_32, %443:sgpr_128, 0, 192, 0, 0, implicit $exec |
| %800:vgpr_32 = V_ADD_U32_e32 192, %602:vgpr_32, implicit $exec |
| %801:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %800:vgpr_32, %443:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %802:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3330:vgpr_32, implicit $exec |
| %803:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %802:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %804:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3331:vgpr_32, implicit $exec |
| %805:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %804:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %806:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3332:vgpr_32, implicit $exec |
| %807:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %806:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %808:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3333:vgpr_32, implicit $exec |
| %809:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %808:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %810:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 0, 0, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %810.sub0_sub1:vreg_128_align2, %411.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %810.sub2_sub3:vreg_128_align2, %411.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %821:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 512, 0, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %821.sub0_sub1:vreg_128_align2, %411.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %821.sub2_sub3:vreg_128_align2, %411.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %832:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1024, 0, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %832.sub0_sub1:vreg_128_align2, %411.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %832.sub2_sub3:vreg_128_align2, %411.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %843:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1536, 0, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %843.sub0_sub1:vreg_128_align2, %411.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %843.sub2_sub3:vreg_128_align2, %411.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %854:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 0, 0, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %854.sub0_sub1:vreg_128_align2, %416.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %854.sub2_sub3:vreg_128_align2, %416.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %865:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 512, 0, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %865.sub0_sub1:vreg_128_align2, %416.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %865.sub2_sub3:vreg_128_align2, %416.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %876:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1024, 0, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %876.sub0_sub1:vreg_128_align2, %416.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %876.sub2_sub3:vreg_128_align2, %416.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %887:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1536, 0, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %887.sub0_sub1:vreg_128_align2, %416.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %887.sub2_sub3:vreg_128_align2, %416.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %798:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %801:vreg_128_align2, 1024, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %898:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 0, 0, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %898.sub0_sub1:vreg_128_align2, %421.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %898.sub2_sub3:vreg_128_align2, %421.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %909:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 512, 0, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %909.sub0_sub1:vreg_128_align2, %421.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %909.sub2_sub3:vreg_128_align2, %421.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %920:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1024, 0, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %920.sub0_sub1:vreg_128_align2, %421.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %920.sub2_sub3:vreg_128_align2, %421.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %931:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1536, 0, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %931.sub0_sub1:vreg_128_align2, %421.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %931.sub2_sub3:vreg_128_align2, %421.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %942:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 0, 0, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %942.sub0_sub1:vreg_128_align2, %426.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %942.sub2_sub3:vreg_128_align2, %426.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %969:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 512, 0, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %969.sub0_sub1:vreg_128_align2, %426.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %969.sub2_sub3:vreg_128_align2, %426.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %996:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1024, 0, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %996.sub0_sub1:vreg_128_align2, %426.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %996.sub2_sub3:vreg_128_align2, %426.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %1023:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1536, 0, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1023.sub0_sub1:vreg_128_align2, %426.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1023.sub2_sub3:vreg_128_align2, %426.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %1050:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub0:vreg_512_align2, implicit $mode, implicit $exec |
| %1051:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub1:vreg_512_align2, implicit $mode, implicit $exec |
| %1052:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub2:vreg_512_align2, implicit $mode, implicit $exec |
| %1053:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub3:vreg_512_align2, implicit $mode, implicit $exec |
| %1054:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub4:vreg_512_align2, implicit $mode, implicit $exec |
| %1055:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub5:vreg_512_align2, implicit $mode, implicit $exec |
| %1056:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub6:vreg_512_align2, implicit $mode, implicit $exec |
| %1057:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub7:vreg_512_align2, implicit $mode, implicit $exec |
| %1058:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub8:vreg_512_align2, implicit $mode, implicit $exec |
| %1059:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub9:vreg_512_align2, implicit $mode, implicit $exec |
| %1060:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub10:vreg_512_align2, implicit $mode, implicit $exec |
| %1061:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub11:vreg_512_align2, implicit $mode, implicit $exec |
| %1062:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub12:vreg_512_align2, implicit $mode, implicit $exec |
| %1063:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub13:vreg_512_align2, implicit $mode, implicit $exec |
| %1064:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub14:vreg_512_align2, implicit $mode, implicit $exec |
| %1065:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub15:vreg_512_align2, implicit $mode, implicit $exec |
| %1066:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub0:vreg_512_align2, implicit $mode, implicit $exec |
| %1067:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub1:vreg_512_align2, implicit $mode, implicit $exec |
| %1068:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub2:vreg_512_align2, implicit $mode, implicit $exec |
| %1069:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub3:vreg_512_align2, implicit $mode, implicit $exec |
| %1070:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub4:vreg_512_align2, implicit $mode, implicit $exec |
| %1071:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub5:vreg_512_align2, implicit $mode, implicit $exec |
| %1072:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub6:vreg_512_align2, implicit $mode, implicit $exec |
| %1073:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub7:vreg_512_align2, implicit $mode, implicit $exec |
| %1074:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub8:vreg_512_align2, implicit $mode, implicit $exec |
| %1075:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub9:vreg_512_align2, implicit $mode, implicit $exec |
| %1076:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub10:vreg_512_align2, implicit $mode, implicit $exec |
| %1077:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub11:vreg_512_align2, implicit $mode, implicit $exec |
| %1078:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub12:vreg_512_align2, implicit $mode, implicit $exec |
| %1079:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub13:vreg_512_align2, implicit $mode, implicit $exec |
| %1080:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub14:vreg_512_align2, implicit $mode, implicit $exec |
| %1081:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub15:vreg_512_align2, implicit $mode, implicit $exec |
| %1082:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub0:vreg_512_align2, implicit $mode, implicit $exec |
| %1083:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub1:vreg_512_align2, implicit $mode, implicit $exec |
| %1084:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub2:vreg_512_align2, implicit $mode, implicit $exec |
| %1085:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub3:vreg_512_align2, implicit $mode, implicit $exec |
| %1086:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub4:vreg_512_align2, implicit $mode, implicit $exec |
| %1087:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub5:vreg_512_align2, implicit $mode, implicit $exec |
| %1088:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub6:vreg_512_align2, implicit $mode, implicit $exec |
| %1089:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub7:vreg_512_align2, implicit $mode, implicit $exec |
| %1090:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub8:vreg_512_align2, implicit $mode, implicit $exec |
| %1091:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub9:vreg_512_align2, implicit $mode, implicit $exec |
| %1092:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub10:vreg_512_align2, implicit $mode, implicit $exec |
| %1093:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub11:vreg_512_align2, implicit $mode, implicit $exec |
| %1094:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub12:vreg_512_align2, implicit $mode, implicit $exec |
| %1095:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub13:vreg_512_align2, implicit $mode, implicit $exec |
| %1096:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub14:vreg_512_align2, implicit $mode, implicit $exec |
| %1097:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub15:vreg_512_align2, implicit $mode, implicit $exec |
| %1098:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub0:vreg_512_align2, implicit $mode, implicit $exec |
| %1099:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub1:vreg_512_align2, implicit $mode, implicit $exec |
| %1100:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub2:vreg_512_align2, implicit $mode, implicit $exec |
| %1101:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub3:vreg_512_align2, implicit $mode, implicit $exec |
| %1102:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub4:vreg_512_align2, implicit $mode, implicit $exec |
| %1103:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub5:vreg_512_align2, implicit $mode, implicit $exec |
| %1104:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub6:vreg_512_align2, implicit $mode, implicit $exec |
| %1105:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub7:vreg_512_align2, implicit $mode, implicit $exec |
| %1106:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub8:vreg_512_align2, implicit $mode, implicit $exec |
| %1107:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub9:vreg_512_align2, implicit $mode, implicit $exec |
| %1108:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub10:vreg_512_align2, implicit $mode, implicit $exec |
| %1109:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub11:vreg_512_align2, implicit $mode, implicit $exec |
| %1110:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub12:vreg_512_align2, implicit $mode, implicit $exec |
| %1111:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub13:vreg_512_align2, implicit $mode, implicit $exec |
| %1112:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub14:vreg_512_align2, implicit $mode, implicit $exec |
| %1113:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub15:vreg_512_align2, implicit $mode, implicit $exec |
| %1115:vgpr_32 = V_MAX3_F32_e64 0, %1050:vgpr_32, 0, %1114:sgpr_32, 0, %1051:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1116:vgpr_32 = V_MAX3_F32_e64 0, %1115:vgpr_32, 0, %1052:vgpr_32, 0, %1053:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1117:vgpr_32 = V_MAX3_F32_e64 0, %1116:vgpr_32, 0, %1054:vgpr_32, 0, %1055:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1118:vgpr_32 = V_MAX3_F32_e64 0, %1117:vgpr_32, 0, %1056:vgpr_32, 0, %1057:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1119:vgpr_32 = V_MAX3_F32_e64 0, %1118:vgpr_32, 0, %1058:vgpr_32, 0, %1059:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1120:vgpr_32 = V_MAX3_F32_e64 0, %1119:vgpr_32, 0, %1060:vgpr_32, 0, %1061:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1121:vgpr_32 = V_MAX3_F32_e64 0, %1120:vgpr_32, 0, %1062:vgpr_32, 0, %1063:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1122:vgpr_32 = V_MAX3_F32_e64 0, %1121:vgpr_32, 0, %1064:vgpr_32, 0, %1065:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1123:vgpr_32 = V_MAX3_F32_e64 0, %1122:vgpr_32, 0, %1066:vgpr_32, 0, %1067:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1124:vgpr_32 = V_MAX3_F32_e64 0, %1123:vgpr_32, 0, %1068:vgpr_32, 0, %1069:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1125:vgpr_32 = V_MAX3_F32_e64 0, %1124:vgpr_32, 0, %1070:vgpr_32, 0, %1071:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1126:vgpr_32 = V_MAX3_F32_e64 0, %1125:vgpr_32, 0, %1072:vgpr_32, 0, %1073:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1127:vgpr_32 = V_MAX3_F32_e64 0, %1126:vgpr_32, 0, %1074:vgpr_32, 0, %1075:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1128:vgpr_32 = V_MAX3_F32_e64 0, %1127:vgpr_32, 0, %1076:vgpr_32, 0, %1077:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1129:vgpr_32 = V_MAX3_F32_e64 0, %1128:vgpr_32, 0, %1078:vgpr_32, 0, %1079:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1130:vgpr_32 = V_MAX3_F32_e64 0, %1129:vgpr_32, 0, %1080:vgpr_32, 0, %1081:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1131:vgpr_32 = V_MAX3_F32_e64 0, %1130:vgpr_32, 0, %1082:vgpr_32, 0, %1083:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1132:vgpr_32 = V_MAX3_F32_e64 0, %1131:vgpr_32, 0, %1084:vgpr_32, 0, %1085:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1133:vgpr_32 = V_MAX3_F32_e64 0, %1132:vgpr_32, 0, %1086:vgpr_32, 0, %1087:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1134:vgpr_32 = V_MAX3_F32_e64 0, %1133:vgpr_32, 0, %1088:vgpr_32, 0, %1089:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1135:vgpr_32 = V_MAX3_F32_e64 0, %1134:vgpr_32, 0, %1090:vgpr_32, 0, %1091:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1136:vgpr_32 = V_MAX3_F32_e64 0, %1135:vgpr_32, 0, %1092:vgpr_32, 0, %1093:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1137:vgpr_32 = V_MAX3_F32_e64 0, %1136:vgpr_32, 0, %1094:vgpr_32, 0, %1095:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1138:vgpr_32 = V_MAX3_F32_e64 0, %1137:vgpr_32, 0, %1096:vgpr_32, 0, %1097:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1139:vgpr_32 = V_MAX3_F32_e64 0, %1138:vgpr_32, 0, %1098:vgpr_32, 0, %1099:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1140:vgpr_32 = V_MAX3_F32_e64 0, %1139:vgpr_32, 0, %1100:vgpr_32, 0, %1101:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1141:vgpr_32 = V_MAX3_F32_e64 0, %1140:vgpr_32, 0, %1102:vgpr_32, 0, %1103:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1142:vgpr_32 = V_MAX3_F32_e64 0, %1141:vgpr_32, 0, %1104:vgpr_32, 0, %1105:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1143:vgpr_32 = V_MAX3_F32_e64 0, %1142:vgpr_32, 0, %1106:vgpr_32, 0, %1107:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1144:vgpr_32 = V_MAX3_F32_e64 0, %1143:vgpr_32, 0, %1108:vgpr_32, 0, %1109:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1145:vgpr_32 = V_MAX3_F32_e64 0, %1144:vgpr_32, 0, %1110:vgpr_32, 0, %1111:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1146:vgpr_32 = V_MAX3_F32_e64 0, %1145:vgpr_32, 0, %1112:vgpr_32, 0, %1113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1147:vgpr_32 = DS_BPERMUTE_B32 %39:vgpr_32, %1146:vgpr_32, 0, implicit $exec |
| %1148:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %1147:vgpr_32, %1147:vgpr_32, implicit $mode, implicit $exec |
| %1149:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %1146:vgpr_32, %1148:vgpr_32, implicit $mode, implicit $exec |
| %1150:vgpr_32 = DS_BPERMUTE_B32 %39:vgpr_32, %1149:vgpr_32, 0, implicit $exec |
| %1151:vgpr_32 = V_CNDMASK_B32_e64 0, %1150:vgpr_32, 0, %1149:vgpr_32, %484:sreg_64_xexec, implicit $exec |
| %1153:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %1151:vgpr_32, %1151:vgpr_32, implicit $mode, implicit $exec |
| %1154:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %3346:vgpr_32, %3346:vgpr_32, implicit $mode, implicit $exec |
| %151:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %1154:vgpr_32, %1153:vgpr_32, implicit $mode, implicit $exec |
| %1155:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub0:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1157:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1155:vgpr_32, implicit $mode, implicit $exec |
| %1158:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1157:vgpr_32, implicit $mode, implicit $exec |
| %1159:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub1:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1160:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1159:vgpr_32, implicit $mode, implicit $exec |
| %1161:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1160:vgpr_32, implicit $mode, implicit $exec |
| %1162:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub2:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1163:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1162:vgpr_32, implicit $mode, implicit $exec |
| %1164:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1163:vgpr_32, implicit $mode, implicit $exec |
| %1165:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub3:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1166:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1165:vgpr_32, implicit $mode, implicit $exec |
| %1167:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1166:vgpr_32, implicit $mode, implicit $exec |
| %1168:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub4:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1169:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1168:vgpr_32, implicit $mode, implicit $exec |
| %1170:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1169:vgpr_32, implicit $mode, implicit $exec |
| %1171:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub5:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1172:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1171:vgpr_32, implicit $mode, implicit $exec |
| %1173:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1172:vgpr_32, implicit $mode, implicit $exec |
| %1174:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub6:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1175:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1174:vgpr_32, implicit $mode, implicit $exec |
| %1176:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1175:vgpr_32, implicit $mode, implicit $exec |
| %1177:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub7:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1178:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1177:vgpr_32, implicit $mode, implicit $exec |
| %1179:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1178:vgpr_32, implicit $mode, implicit $exec |
| %1180:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub8:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1181:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1180:vgpr_32, implicit $mode, implicit $exec |
| %1182:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1181:vgpr_32, implicit $mode, implicit $exec |
| %1183:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub9:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1184:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1183:vgpr_32, implicit $mode, implicit $exec |
| %1185:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1184:vgpr_32, implicit $mode, implicit $exec |
| %1186:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub10:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1187:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1186:vgpr_32, implicit $mode, implicit $exec |
| %1188:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1187:vgpr_32, implicit $mode, implicit $exec |
| %1189:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub11:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1190:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1189:vgpr_32, implicit $mode, implicit $exec |
| %1191:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1190:vgpr_32, implicit $mode, implicit $exec |
| %1192:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub12:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1193:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1192:vgpr_32, implicit $mode, implicit $exec |
| %1194:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1193:vgpr_32, implicit $mode, implicit $exec |
| %1195:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub13:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1196:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1195:vgpr_32, implicit $mode, implicit $exec |
| %1197:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1196:vgpr_32, implicit $mode, implicit $exec |
| %1198:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub14:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1199:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1198:vgpr_32, implicit $mode, implicit $exec |
| %1200:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1199:vgpr_32, implicit $mode, implicit $exec |
| %1201:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub15:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1202:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1201:vgpr_32, implicit $mode, implicit $exec |
| %1203:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1202:vgpr_32, implicit $mode, implicit $exec |
| %1204:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub0:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1205:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1204:vgpr_32, implicit $mode, implicit $exec |
| %1206:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1205:vgpr_32, implicit $mode, implicit $exec |
| %1207:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub1:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1208:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1207:vgpr_32, implicit $mode, implicit $exec |
| %1209:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1208:vgpr_32, implicit $mode, implicit $exec |
| %1210:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub2:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1211:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1210:vgpr_32, implicit $mode, implicit $exec |
| %1212:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1211:vgpr_32, implicit $mode, implicit $exec |
| %1213:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub3:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1214:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1213:vgpr_32, implicit $mode, implicit $exec |
| %1215:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1214:vgpr_32, implicit $mode, implicit $exec |
| %1216:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub4:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1217:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1216:vgpr_32, implicit $mode, implicit $exec |
| %1218:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1217:vgpr_32, implicit $mode, implicit $exec |
| %1219:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub5:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1220:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1219:vgpr_32, implicit $mode, implicit $exec |
| %1221:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1220:vgpr_32, implicit $mode, implicit $exec |
| %1222:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub6:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1223:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1222:vgpr_32, implicit $mode, implicit $exec |
| %1224:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1223:vgpr_32, implicit $mode, implicit $exec |
| %1225:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub7:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1226:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1225:vgpr_32, implicit $mode, implicit $exec |
| %1227:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1226:vgpr_32, implicit $mode, implicit $exec |
| %1228:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub8:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1229:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1228:vgpr_32, implicit $mode, implicit $exec |
| %1230:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1229:vgpr_32, implicit $mode, implicit $exec |
| %1231:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub9:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1232:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1231:vgpr_32, implicit $mode, implicit $exec |
| %1233:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1232:vgpr_32, implicit $mode, implicit $exec |
| %1234:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub10:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1235:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1234:vgpr_32, implicit $mode, implicit $exec |
| %1236:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1235:vgpr_32, implicit $mode, implicit $exec |
| %1237:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub11:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1238:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1237:vgpr_32, implicit $mode, implicit $exec |
| %1239:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1238:vgpr_32, implicit $mode, implicit $exec |
| %1240:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub12:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1241:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1240:vgpr_32, implicit $mode, implicit $exec |
| %1242:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1241:vgpr_32, implicit $mode, implicit $exec |
| %1243:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub13:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1244:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1243:vgpr_32, implicit $mode, implicit $exec |
| %1245:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1244:vgpr_32, implicit $mode, implicit $exec |
| %1246:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub14:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1247:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1246:vgpr_32, implicit $mode, implicit $exec |
| %1248:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1247:vgpr_32, implicit $mode, implicit $exec |
| %1249:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub15:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1250:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1249:vgpr_32, implicit $mode, implicit $exec |
| %1251:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1250:vgpr_32, implicit $mode, implicit $exec |
| %1252:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub0:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1253:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1252:vgpr_32, implicit $mode, implicit $exec |
| %1254:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1253:vgpr_32, implicit $mode, implicit $exec |
| %1255:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub1:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1256:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1255:vgpr_32, implicit $mode, implicit $exec |
| %1257:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1256:vgpr_32, implicit $mode, implicit $exec |
| %1258:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub2:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1259:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1258:vgpr_32, implicit $mode, implicit $exec |
| %1260:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1259:vgpr_32, implicit $mode, implicit $exec |
| %1261:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub3:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1262:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1261:vgpr_32, implicit $mode, implicit $exec |
| %1263:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1262:vgpr_32, implicit $mode, implicit $exec |
| %1264:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub4:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1265:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1264:vgpr_32, implicit $mode, implicit $exec |
| %1266:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1265:vgpr_32, implicit $mode, implicit $exec |
| %1267:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub5:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1268:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1267:vgpr_32, implicit $mode, implicit $exec |
| %1269:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1268:vgpr_32, implicit $mode, implicit $exec |
| %1270:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub6:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1271:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1270:vgpr_32, implicit $mode, implicit $exec |
| %1272:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1271:vgpr_32, implicit $mode, implicit $exec |
| %1273:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub7:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1274:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1273:vgpr_32, implicit $mode, implicit $exec |
| %1275:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1274:vgpr_32, implicit $mode, implicit $exec |
| %1276:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub8:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1277:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1276:vgpr_32, implicit $mode, implicit $exec |
| %1278:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1277:vgpr_32, implicit $mode, implicit $exec |
| %1279:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub9:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1280:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1279:vgpr_32, implicit $mode, implicit $exec |
| %1281:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1280:vgpr_32, implicit $mode, implicit $exec |
| %1282:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub10:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1283:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1282:vgpr_32, implicit $mode, implicit $exec |
| %1284:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1283:vgpr_32, implicit $mode, implicit $exec |
| %1285:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub11:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1286:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1285:vgpr_32, implicit $mode, implicit $exec |
| %1287:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1286:vgpr_32, implicit $mode, implicit $exec |
| %1288:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub12:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1289:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1288:vgpr_32, implicit $mode, implicit $exec |
| %1290:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1289:vgpr_32, implicit $mode, implicit $exec |
| %1291:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub13:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1292:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1291:vgpr_32, implicit $mode, implicit $exec |
| %1293:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1292:vgpr_32, implicit $mode, implicit $exec |
| %1294:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub14:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1295:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1294:vgpr_32, implicit $mode, implicit $exec |
| %1296:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1295:vgpr_32, implicit $mode, implicit $exec |
| %1297:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub15:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1298:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1297:vgpr_32, implicit $mode, implicit $exec |
| %1299:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1298:vgpr_32, implicit $mode, implicit $exec |
| %1300:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub0:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1301:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1300:vgpr_32, implicit $mode, implicit $exec |
| %1302:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1301:vgpr_32, implicit $mode, implicit $exec |
| %1303:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub1:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1304:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1303:vgpr_32, implicit $mode, implicit $exec |
| %1305:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1304:vgpr_32, implicit $mode, implicit $exec |
| %1306:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub2:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1307:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1306:vgpr_32, implicit $mode, implicit $exec |
| %1308:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1307:vgpr_32, implicit $mode, implicit $exec |
| %1309:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub3:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1310:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1309:vgpr_32, implicit $mode, implicit $exec |
| %1311:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1310:vgpr_32, implicit $mode, implicit $exec |
| %1312:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub4:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1313:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1312:vgpr_32, implicit $mode, implicit $exec |
| %1314:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1313:vgpr_32, implicit $mode, implicit $exec |
| %1315:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub5:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1316:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1315:vgpr_32, implicit $mode, implicit $exec |
| %1317:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1316:vgpr_32, implicit $mode, implicit $exec |
| %1318:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub6:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1319:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1318:vgpr_32, implicit $mode, implicit $exec |
| %1320:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1319:vgpr_32, implicit $mode, implicit $exec |
| %1321:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub7:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1322:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1321:vgpr_32, implicit $mode, implicit $exec |
| %1323:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1322:vgpr_32, implicit $mode, implicit $exec |
| %1324:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub8:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1325:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1324:vgpr_32, implicit $mode, implicit $exec |
| %1326:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1325:vgpr_32, implicit $mode, implicit $exec |
| %1327:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub9:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1328:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1327:vgpr_32, implicit $mode, implicit $exec |
| %1329:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1328:vgpr_32, implicit $mode, implicit $exec |
| %1330:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub10:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1331:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1330:vgpr_32, implicit $mode, implicit $exec |
| %1332:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1331:vgpr_32, implicit $mode, implicit $exec |
| %1333:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub11:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1334:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1333:vgpr_32, implicit $mode, implicit $exec |
| %1335:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1334:vgpr_32, implicit $mode, implicit $exec |
| %1336:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub12:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1337:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1336:vgpr_32, implicit $mode, implicit $exec |
| %1338:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1337:vgpr_32, implicit $mode, implicit $exec |
| %1339:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub13:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1340:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1339:vgpr_32, implicit $mode, implicit $exec |
| %1341:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1340:vgpr_32, implicit $mode, implicit $exec |
| %1342:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub14:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1343:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1342:vgpr_32, implicit $mode, implicit $exec |
| %1344:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1343:vgpr_32, implicit $mode, implicit $exec |
| %1345:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub15:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1346:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1345:vgpr_32, implicit $mode, implicit $exec |
| %1347:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1346:vgpr_32, implicit $mode, implicit $exec |
| %1348:vgpr_32 = contract nofpexcept V_ADD_F32_e32 0, %1158:vgpr_32, implicit $mode, implicit $exec |
| %1349:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1161:vgpr_32, %1348:vgpr_32, implicit $mode, implicit $exec |
| %1350:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1164:vgpr_32, %1349:vgpr_32, implicit $mode, implicit $exec |
| %1351:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1167:vgpr_32, %1350:vgpr_32, implicit $mode, implicit $exec |
| %1352:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1170:vgpr_32, %1351:vgpr_32, implicit $mode, implicit $exec |
| %1353:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1173:vgpr_32, %1352:vgpr_32, implicit $mode, implicit $exec |
| %1354:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1176:vgpr_32, %1353:vgpr_32, implicit $mode, implicit $exec |
| %1355:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1179:vgpr_32, %1354:vgpr_32, implicit $mode, implicit $exec |
| %1356:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1182:vgpr_32, %1355:vgpr_32, implicit $mode, implicit $exec |
| %1357:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1185:vgpr_32, %1356:vgpr_32, implicit $mode, implicit $exec |
| %1358:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1188:vgpr_32, %1357:vgpr_32, implicit $mode, implicit $exec |
| %1359:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1191:vgpr_32, %1358:vgpr_32, implicit $mode, implicit $exec |
| %1360:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1194:vgpr_32, %1359:vgpr_32, implicit $mode, implicit $exec |
| %1361:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1197:vgpr_32, %1360:vgpr_32, implicit $mode, implicit $exec |
| %1362:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1200:vgpr_32, %1361:vgpr_32, implicit $mode, implicit $exec |
| %1363:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1203:vgpr_32, %1362:vgpr_32, implicit $mode, implicit $exec |
| %1364:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1206:vgpr_32, %1363:vgpr_32, implicit $mode, implicit $exec |
| %1365:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1209:vgpr_32, %1364:vgpr_32, implicit $mode, implicit $exec |
| %1366:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1212:vgpr_32, %1365:vgpr_32, implicit $mode, implicit $exec |
| %1367:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1215:vgpr_32, %1366:vgpr_32, implicit $mode, implicit $exec |
| %1368:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1218:vgpr_32, %1367:vgpr_32, implicit $mode, implicit $exec |
| %1369:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1221:vgpr_32, %1368:vgpr_32, implicit $mode, implicit $exec |
| %1370:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1224:vgpr_32, %1369:vgpr_32, implicit $mode, implicit $exec |
| %1371:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1227:vgpr_32, %1370:vgpr_32, implicit $mode, implicit $exec |
| %1372:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1230:vgpr_32, %1371:vgpr_32, implicit $mode, implicit $exec |
| %1373:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1233:vgpr_32, %1372:vgpr_32, implicit $mode, implicit $exec |
| %1374:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1236:vgpr_32, %1373:vgpr_32, implicit $mode, implicit $exec |
| %1375:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1239:vgpr_32, %1374:vgpr_32, implicit $mode, implicit $exec |
| %1376:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1242:vgpr_32, %1375:vgpr_32, implicit $mode, implicit $exec |
| %1377:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1245:vgpr_32, %1376:vgpr_32, implicit $mode, implicit $exec |
| %1378:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1248:vgpr_32, %1377:vgpr_32, implicit $mode, implicit $exec |
| %1379:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1251:vgpr_32, %1378:vgpr_32, implicit $mode, implicit $exec |
| %1380:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1254:vgpr_32, %1379:vgpr_32, implicit $mode, implicit $exec |
| %1381:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1257:vgpr_32, %1380:vgpr_32, implicit $mode, implicit $exec |
| %1382:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1260:vgpr_32, %1381:vgpr_32, implicit $mode, implicit $exec |
| %1383:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1263:vgpr_32, %1382:vgpr_32, implicit $mode, implicit $exec |
| %1384:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1266:vgpr_32, %1383:vgpr_32, implicit $mode, implicit $exec |
| %1385:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1269:vgpr_32, %1384:vgpr_32, implicit $mode, implicit $exec |
| %1386:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1272:vgpr_32, %1385:vgpr_32, implicit $mode, implicit $exec |
| %1387:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1275:vgpr_32, %1386:vgpr_32, implicit $mode, implicit $exec |
| %1388:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1278:vgpr_32, %1387:vgpr_32, implicit $mode, implicit $exec |
| %1389:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1281:vgpr_32, %1388:vgpr_32, implicit $mode, implicit $exec |
| %1390:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1284:vgpr_32, %1389:vgpr_32, implicit $mode, implicit $exec |
| %1391:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1287:vgpr_32, %1390:vgpr_32, implicit $mode, implicit $exec |
| %1392:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1290:vgpr_32, %1391:vgpr_32, implicit $mode, implicit $exec |
| %1393:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1293:vgpr_32, %1392:vgpr_32, implicit $mode, implicit $exec |
| %1394:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1296:vgpr_32, %1393:vgpr_32, implicit $mode, implicit $exec |
| %1395:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1299:vgpr_32, %1394:vgpr_32, implicit $mode, implicit $exec |
| %1396:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1302:vgpr_32, %1395:vgpr_32, implicit $mode, implicit $exec |
| %1397:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1305:vgpr_32, %1396:vgpr_32, implicit $mode, implicit $exec |
| %1398:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1308:vgpr_32, %1397:vgpr_32, implicit $mode, implicit $exec |
| %1399:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1311:vgpr_32, %1398:vgpr_32, implicit $mode, implicit $exec |
| %1400:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1314:vgpr_32, %1399:vgpr_32, implicit $mode, implicit $exec |
| %1401:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1317:vgpr_32, %1400:vgpr_32, implicit $mode, implicit $exec |
| %1402:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1320:vgpr_32, %1401:vgpr_32, implicit $mode, implicit $exec |
| %1403:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1323:vgpr_32, %1402:vgpr_32, implicit $mode, implicit $exec |
| %1404:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1326:vgpr_32, %1403:vgpr_32, implicit $mode, implicit $exec |
| %1405:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1329:vgpr_32, %1404:vgpr_32, implicit $mode, implicit $exec |
| %1406:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1332:vgpr_32, %1405:vgpr_32, implicit $mode, implicit $exec |
| %1407:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1335:vgpr_32, %1406:vgpr_32, implicit $mode, implicit $exec |
| %1408:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1338:vgpr_32, %1407:vgpr_32, implicit $mode, implicit $exec |
| %1409:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1341:vgpr_32, %1408:vgpr_32, implicit $mode, implicit $exec |
| %1410:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1344:vgpr_32, %1409:vgpr_32, implicit $mode, implicit $exec |
| %1411:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1347:vgpr_32, %1410:vgpr_32, implicit $mode, implicit $exec |
| %1412:vgpr_32 = DS_BPERMUTE_B32 %39:vgpr_32, %1411:vgpr_32, 0, implicit $exec |
| %1413:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1411:vgpr_32, %1412:vgpr_32, implicit $mode, implicit $exec |
| %1414:vgpr_32 = DS_BPERMUTE_B32 %39:vgpr_32, %1413:vgpr_32, 0, implicit $exec |
| %3347:vgpr_32 = V_CNDMASK_B32_e64 0, %1414:vgpr_32, 0, %1413:vgpr_32, %484:sreg_64_xexec, implicit $exec |
| %1417:vgpr_32 = contract nofpexcept V_SUB_F32_e32 %3346:vgpr_32, %151:vgpr_32, implicit $mode, implicit $exec |
| %1418:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1417:vgpr_32, implicit $mode, implicit $exec |
| undef %1455.sub0:vreg_64_align2 = afn nofpexcept V_EXP_F32_e32 %1418:vgpr_32, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| undef %3037.sub0:vreg_64_align2 = V_PERM_B32_e64 %805.sub0:vreg_64_align2, %803.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %3021.sub0:vreg_64_align2 = V_PERM_B32_e64 %805.sub0:vreg_64_align2, %803.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3037.sub1:vreg_64_align2 = V_PERM_B32_e64 %809.sub0:vreg_64_align2, %807.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %3021.sub1:vreg_64_align2 = V_PERM_B32_e64 %809.sub0:vreg_64_align2, %807.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| undef %3005.sub0:vreg_64_align2 = V_PERM_B32_e64 %805.sub1:vreg_64_align2, %803.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %2978.sub0:vreg_64_align2 = V_PERM_B32_e64 %805.sub1:vreg_64_align2, %803.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3005.sub1:vreg_64_align2 = V_PERM_B32_e64 %809.sub1:vreg_64_align2, %807.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %2978.sub1:vreg_64_align2 = V_PERM_B32_e64 %809.sub1:vreg_64_align2, %807.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %1442:vgpr_32 = V_ADD_U32_e32 %593:sreg_32_xm0, %15:vgpr_32, implicit $exec |
| %1444:vgpr_32 = V_AND_B32_e32 536870911, %1442:vgpr_32, implicit $exec |
| %1446:vgpr_32 = nsw V_MUL_LO_U32_e64 %1444:vgpr_32, %494:sreg_32, implicit $exec |
| %1447:vgpr_32 = V_ADD_LSHL_U32_e64 %47:vgpr_32, %1446:vgpr_32, 1, implicit $exec |
| DS_WRITE_B64_gfx9 %1447:vgpr_32, %3037:vreg_64_align2, 0, 0, implicit $exec |
| %1449:vgpr_32 = V_LSHL_ADD_U32_e64 %41:vgpr_32, 1, %1447:vgpr_32, implicit $exec |
| DS_WRITE_B64_gfx9 %1449:vgpr_32, %3021:vreg_64_align2, 0, 0, implicit $exec |
| %1451:vgpr_32 = V_LSHL_ADD_U32_e64 %42:vgpr_32, 1, %1449:vgpr_32, implicit $exec |
| DS_WRITE_B64_gfx9 %1451:vgpr_32, %3005:vreg_64_align2, 0, 0, implicit $exec |
| %1453:vgpr_32 = V_LSHL_ADD_U32_e64 %43:vgpr_32, 1, %1451:vgpr_32, implicit $exec |
| DS_WRITE_B64_gfx9 %1453:vgpr_32, %2978:vreg_64_align2, 0, 0, implicit $exec |
| %3347:vgpr_32 = contract nofpexcept V_FMAC_F32_e32 %86:vgpr_32, %1455.sub0:vreg_64_align2, %3347:vgpr_32, implicit $mode, implicit $exec |
| %2986.sub0_sub1:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub0_sub1:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2986.sub2_sub3:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub2_sub3:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2986.sub4_sub5:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub4_sub5:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2986.sub6_sub7:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub6_sub7:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2986.sub8_sub9:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub8_sub9:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2986.sub10_sub11:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub10_sub11:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2986.sub12_sub13:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub12_sub13:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2986.sub14_sub15:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub14_sub15:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub0_sub1:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub0_sub1:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub2_sub3:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub2_sub3:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub4_sub5:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub4_sub5:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub6_sub7:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub6_sub7:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub8_sub9:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub8_sub9:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub10_sub11:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub10_sub11:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub12_sub13:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub12_sub13:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub14_sub15:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub14_sub15:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub0_sub1:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub0_sub1:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub2_sub3:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub2_sub3:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub4_sub5:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub4_sub5:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub6_sub7:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub6_sub7:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub8_sub9:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub8_sub9:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub10_sub11:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub10_sub11:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub12_sub13:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub12_sub13:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub14_sub15:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub14_sub15:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub0_sub1:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub0_sub1:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub2_sub3:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub2_sub3:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub4_sub5:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub4_sub5:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub6_sub7:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub6_sub7:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub8_sub9:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub8_sub9:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub10_sub11:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub10_sub11:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub12_sub13:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub12_sub13:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub14_sub15:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub14_sub15:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %1554:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1158:vgpr_32, implicit $mode, implicit $exec |
| %1555:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1161:vgpr_32, implicit $mode, implicit $exec |
| %1556:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1164:vgpr_32, implicit $mode, implicit $exec |
| %1557:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1170:vgpr_32, implicit $mode, implicit $exec |
| %1558:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1173:vgpr_32, implicit $mode, implicit $exec |
| %1559:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1176:vgpr_32, implicit $mode, implicit $exec |
| %1560:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1182:vgpr_32, implicit $mode, implicit $exec |
| %1561:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1185:vgpr_32, implicit $mode, implicit $exec |
| %1562:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1188:vgpr_32, implicit $mode, implicit $exec |
| %1563:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1194:vgpr_32, implicit $mode, implicit $exec |
| %1564:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1197:vgpr_32, implicit $mode, implicit $exec |
| %1565:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1200:vgpr_32, implicit $mode, implicit $exec |
| %1566:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1206:vgpr_32, implicit $mode, implicit $exec |
| %1567:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1209:vgpr_32, implicit $mode, implicit $exec |
| %1568:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1212:vgpr_32, implicit $mode, implicit $exec |
| %1569:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1218:vgpr_32, implicit $mode, implicit $exec |
| %1570:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1221:vgpr_32, implicit $mode, implicit $exec |
| %1571:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1224:vgpr_32, implicit $mode, implicit $exec |
| %1572:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1230:vgpr_32, implicit $mode, implicit $exec |
| %1573:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1233:vgpr_32, implicit $mode, implicit $exec |
| %1574:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1236:vgpr_32, implicit $mode, implicit $exec |
| %1575:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1242:vgpr_32, implicit $mode, implicit $exec |
| %1576:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1245:vgpr_32, implicit $mode, implicit $exec |
| %1577:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1248:vgpr_32, implicit $mode, implicit $exec |
| %1578:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1254:vgpr_32, implicit $mode, implicit $exec |
| %1579:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1257:vgpr_32, implicit $mode, implicit $exec |
| %1580:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1260:vgpr_32, implicit $mode, implicit $exec |
| %1581:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1266:vgpr_32, implicit $mode, implicit $exec |
| %1582:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1269:vgpr_32, implicit $mode, implicit $exec |
| %1583:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1272:vgpr_32, implicit $mode, implicit $exec |
| %1584:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1278:vgpr_32, implicit $mode, implicit $exec |
| %1585:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1281:vgpr_32, implicit $mode, implicit $exec |
| %1586:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1284:vgpr_32, implicit $mode, implicit $exec |
| %1587:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1290:vgpr_32, implicit $mode, implicit $exec |
| %1588:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1293:vgpr_32, implicit $mode, implicit $exec |
| %1589:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1296:vgpr_32, implicit $mode, implicit $exec |
| %1590:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3345:vgpr_32, implicit $exec |
| %1591:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1590:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1592:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3334:vgpr_32, implicit $exec |
| %1593:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1592:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1594:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3335:vgpr_32, implicit $exec |
| %1595:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1594:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1596:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3336:vgpr_32, implicit $exec |
| %1597:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1596:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %1598:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 0, 0, implicit $exec |
| %1605:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 576, 0, implicit $exec |
| %1612:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1152, 0, implicit $exec |
| %1619:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1728, 0, implicit $exec |
| %1626:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 0, 0, implicit $exec |
| %1633:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 576, 0, implicit $exec |
| %1640:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1152, 0, implicit $exec |
| %1647:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1728, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| undef %3161.sub0:vreg_64_align2 = V_PERM_B32_e64 %1593.sub0:vreg_64_align2, %1591.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %3145.sub0:vreg_64_align2 = V_PERM_B32_e64 %1593.sub0:vreg_64_align2, %1591.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3161.sub1:vreg_64_align2 = V_PERM_B32_e64 %1597.sub0:vreg_64_align2, %1595.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %3145.sub1:vreg_64_align2 = V_PERM_B32_e64 %1597.sub0:vreg_64_align2, %1595.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| undef %3129.sub0:vreg_64_align2 = V_PERM_B32_e64 %1593.sub1:vreg_64_align2, %1591.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %3113.sub0:vreg_64_align2 = V_PERM_B32_e64 %1593.sub1:vreg_64_align2, %1591.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3129.sub1:vreg_64_align2 = V_PERM_B32_e64 %1597.sub1:vreg_64_align2, %1595.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %3113.sub1:vreg_64_align2 = V_PERM_B32_e64 %1597.sub1:vreg_64_align2, %1595.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| DS_WRITE_B64_gfx9 %1447:vgpr_32, %3161:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1449:vgpr_32, %3145:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1451:vgpr_32, %3129:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1453:vgpr_32, %3113:vreg_64_align2, 0, 0, implicit $exec |
| %1678:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3344:vgpr_32, implicit $exec |
| %1679:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1678:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1680:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3337:vgpr_32, implicit $exec |
| %1681:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1680:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1682:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3338:vgpr_32, implicit $exec |
| %1683:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1682:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1684:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3339:vgpr_32, implicit $exec |
| %1685:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1684:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %1686:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 0, 0, implicit $exec |
| %1693:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 576, 0, implicit $exec |
| %1700:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1152, 0, implicit $exec |
| %1707:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1728, 0, implicit $exec |
| %1714:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 0, 0, implicit $exec |
| %1721:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 576, 0, implicit $exec |
| %1728:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1152, 0, implicit $exec |
| %1735:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1728, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| undef %3062.sub0:vreg_64_align2 = V_PERM_B32_e64 %1681.sub0:vreg_64_align2, %1679.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %3046.sub0:vreg_64_align2 = V_PERM_B32_e64 %1681.sub0:vreg_64_align2, %1679.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3062.sub1:vreg_64_align2 = V_PERM_B32_e64 %1685.sub0:vreg_64_align2, %1683.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %3046.sub1:vreg_64_align2 = V_PERM_B32_e64 %1685.sub0:vreg_64_align2, %1683.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| undef %3029.sub0:vreg_64_align2 = V_PERM_B32_e64 %1681.sub1:vreg_64_align2, %1679.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %3013.sub0:vreg_64_align2 = V_PERM_B32_e64 %1681.sub1:vreg_64_align2, %1679.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3029.sub1:vreg_64_align2 = V_PERM_B32_e64 %1685.sub1:vreg_64_align2, %1683.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %3013.sub1:vreg_64_align2 = V_PERM_B32_e64 %1685.sub1:vreg_64_align2, %1683.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| DS_WRITE_B64_gfx9 %1447:vgpr_32, %3062:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1449:vgpr_32, %3046:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1451:vgpr_32, %3029:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1453:vgpr_32, %3013:vreg_64_align2, 0, 0, implicit $exec |
| %1766:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3343:vgpr_32, implicit $exec |
| %1767:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1766:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1768:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3340:vgpr_32, implicit $exec |
| %1769:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1768:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1770:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3341:vgpr_32, implicit $exec |
| %1771:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1770:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1772:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3342:vgpr_32, implicit $exec |
| %1773:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1772:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %1774:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 0, 0, implicit $exec |
| %1781:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 576, 0, implicit $exec |
| %1788:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1152, 0, implicit $exec |
| %1795:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1728, 0, implicit $exec |
| %1802:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 0, 0, implicit $exec |
| %1809:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 576, 0, implicit $exec |
| %1816:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1152, 0, implicit $exec |
| %1823:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1728, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| undef %3185.sub0:vreg_64_align2 = V_PERM_B32_e64 %1769.sub0:vreg_64_align2, %1767.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %3169.sub0:vreg_64_align2 = V_PERM_B32_e64 %1769.sub0:vreg_64_align2, %1767.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3185.sub1:vreg_64_align2 = V_PERM_B32_e64 %1773.sub0:vreg_64_align2, %1771.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %3169.sub1:vreg_64_align2 = V_PERM_B32_e64 %1773.sub0:vreg_64_align2, %1771.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| undef %3153.sub0:vreg_64_align2 = V_PERM_B32_e64 %1769.sub1:vreg_64_align2, %1767.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %3137.sub0:vreg_64_align2 = V_PERM_B32_e64 %1769.sub1:vreg_64_align2, %1767.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3153.sub1:vreg_64_align2 = V_PERM_B32_e64 %1773.sub1:vreg_64_align2, %1771.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %3137.sub1:vreg_64_align2 = V_PERM_B32_e64 %1773.sub1:vreg_64_align2, %1771.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| DS_WRITE_B64_gfx9 %1447:vgpr_32, %3185:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1449:vgpr_32, %3169:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1451:vgpr_32, %3153:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1453:vgpr_32, %3137:vreg_64_align2, 0, 0, implicit $exec |
| %1854:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1167:vgpr_32, implicit $mode, implicit $exec |
| %1855:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1179:vgpr_32, implicit $mode, implicit $exec |
| %1856:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1191:vgpr_32, implicit $mode, implicit $exec |
| %1857:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1203:vgpr_32, implicit $mode, implicit $exec |
| %1858:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1215:vgpr_32, implicit $mode, implicit $exec |
| %1859:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1227:vgpr_32, implicit $mode, implicit $exec |
| %1860:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1239:vgpr_32, implicit $mode, implicit $exec |
| %1861:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1251:vgpr_32, implicit $mode, implicit $exec |
| %1862:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1263:vgpr_32, implicit $mode, implicit $exec |
| %1863:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1275:vgpr_32, implicit $mode, implicit $exec |
| %1864:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1287:vgpr_32, implicit $mode, implicit $exec |
| %1865:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1299:vgpr_32, implicit $mode, implicit $exec |
| undef %3121.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1556:vgpr_32, 0, %1854:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3121.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1554:vgpr_32, 0, %1555:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3105.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1559:vgpr_32, 0, %1855:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3105.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1557:vgpr_32, 0, %1558:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3089.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1562:vgpr_32, 0, %1856:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3089.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1560:vgpr_32, 0, %1561:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3073.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1565:vgpr_32, 0, %1857:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3073.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1563:vgpr_32, 0, %1564:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1598.sub0_sub1:vreg_128_align2, %3121:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1598.sub2_sub3:vreg_128_align2, %3105:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1605.sub0_sub1:vreg_128_align2, %3121:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1605.sub2_sub3:vreg_128_align2, %3105:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1612.sub0_sub1:vreg_128_align2, %3121:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1612.sub2_sub3:vreg_128_align2, %3105:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1619.sub0_sub1:vreg_128_align2, %3121:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1619.sub2_sub3:vreg_128_align2, %3105:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1626.sub0_sub1:vreg_128_align2, %3089:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1626.sub2_sub3:vreg_128_align2, %3073:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1633.sub0_sub1:vreg_128_align2, %3089:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1633.sub2_sub3:vreg_128_align2, %3073:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1640.sub0_sub1:vreg_128_align2, %3089:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1640.sub2_sub3:vreg_128_align2, %3073:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1647.sub0_sub1:vreg_128_align2, %3089:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1647.sub2_sub3:vreg_128_align2, %3073:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| undef %2993.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1568:vgpr_32, 0, %1858:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %2993.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1566:vgpr_32, 0, %1567:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3195.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1571:vgpr_32, 0, %1859:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3195.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1569:vgpr_32, 0, %1570:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3178.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1574:vgpr_32, 0, %1860:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3178.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1572:vgpr_32, 0, %1573:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3162.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1577:vgpr_32, 0, %1861:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3162.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1575:vgpr_32, 0, %1576:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1686.sub0_sub1:vreg_128_align2, %2993:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1686.sub2_sub3:vreg_128_align2, %3195:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1693.sub0_sub1:vreg_128_align2, %2993:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1693.sub2_sub3:vreg_128_align2, %3195:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1700.sub0_sub1:vreg_128_align2, %2993:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1700.sub2_sub3:vreg_128_align2, %3195:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1707.sub0_sub1:vreg_128_align2, %2993:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1707.sub2_sub3:vreg_128_align2, %3195:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1714.sub0_sub1:vreg_128_align2, %3178:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1714.sub2_sub3:vreg_128_align2, %3162:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1721.sub0_sub1:vreg_128_align2, %3178:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1721.sub2_sub3:vreg_128_align2, %3162:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1728.sub0_sub1:vreg_128_align2, %3178:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1728.sub2_sub3:vreg_128_align2, %3162:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1735.sub0_sub1:vreg_128_align2, %3178:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1735.sub2_sub3:vreg_128_align2, %3162:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| undef %3146.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1580:vgpr_32, 0, %1862:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3146.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1578:vgpr_32, 0, %1579:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3130.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1583:vgpr_32, 0, %1863:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3130.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1581:vgpr_32, 0, %1582:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3114.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1586:vgpr_32, 0, %1864:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3114.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1584:vgpr_32, 0, %1585:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3098.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1589:vgpr_32, 0, %1865:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3098.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1587:vgpr_32, 0, %1588:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1774.sub0_sub1:vreg_128_align2, %3146:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1774.sub2_sub3:vreg_128_align2, %3130:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1781.sub0_sub1:vreg_128_align2, %3146:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1781.sub2_sub3:vreg_128_align2, %3130:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1788.sub0_sub1:vreg_128_align2, %3146:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1788.sub2_sub3:vreg_128_align2, %3130:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1795.sub0_sub1:vreg_128_align2, %3146:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1795.sub2_sub3:vreg_128_align2, %3130:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1802.sub0_sub1:vreg_128_align2, %3114:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1802.sub2_sub3:vreg_128_align2, %3098:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1809.sub0_sub1:vreg_128_align2, %3114:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1809.sub2_sub3:vreg_128_align2, %3098:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1816.sub0_sub1:vreg_128_align2, %3114:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1816.sub2_sub3:vreg_128_align2, %3098:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1823.sub0_sub1:vreg_128_align2, %3114:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1823.sub2_sub3:vreg_128_align2, %3098:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2054:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1347:vgpr_32, implicit $mode, implicit $exec |
| %2055:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1341:vgpr_32, implicit $mode, implicit $exec |
| %2056:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1335:vgpr_32, implicit $mode, implicit $exec |
| %2057:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1329:vgpr_32, implicit $mode, implicit $exec |
| %2058:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1323:vgpr_32, implicit $mode, implicit $exec |
| %2059:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1317:vgpr_32, implicit $mode, implicit $exec |
| %2060:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1311:vgpr_32, implicit $mode, implicit $exec |
| %2061:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1305:vgpr_32, implicit $mode, implicit $exec |
| %2062:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1344:vgpr_32, implicit $mode, implicit $exec |
| %2063:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1338:vgpr_32, implicit $mode, implicit $exec |
| %2064:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1332:vgpr_32, implicit $mode, implicit $exec |
| %2065:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1326:vgpr_32, implicit $mode, implicit $exec |
| %2066:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1320:vgpr_32, implicit $mode, implicit $exec |
| %2067:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1314:vgpr_32, implicit $mode, implicit $exec |
| %2068:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1308:vgpr_32, implicit $mode, implicit $exec |
| %2069:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1302:vgpr_32, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| undef %3082.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2068:vgpr_32, 0, %2060:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3082.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2069:vgpr_32, 0, %2061:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3066.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2066:vgpr_32, 0, %2058:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3066.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2067:vgpr_32, 0, %2059:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3050.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2064:vgpr_32, 0, %2056:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3050.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2065:vgpr_32, 0, %2057:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3033.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2062:vgpr_32, 0, %2054:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3033.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2063:vgpr_32, 0, %2055:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %2082:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 0, 0, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2082.sub0_sub1:vreg_128_align2, %3082:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2082.sub2_sub3:vreg_128_align2, %3066:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2095:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 576, 0, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2095.sub0_sub1:vreg_128_align2, %3082:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2095.sub2_sub3:vreg_128_align2, %3066:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2108:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1152, 0, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2108.sub0_sub1:vreg_128_align2, %3082:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2108.sub2_sub3:vreg_128_align2, %3066:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2121:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1728, 0, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2121.sub0_sub1:vreg_128_align2, %3082:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2121.sub2_sub3:vreg_128_align2, %3066:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2134:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 0, 0, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2134.sub0_sub1:vreg_128_align2, %3050:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2134.sub2_sub3:vreg_128_align2, %3033:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2146:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 576, 0, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2146.sub0_sub1:vreg_128_align2, %3050:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2146.sub2_sub3:vreg_128_align2, %3033:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2158:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1152, 0, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2158.sub0_sub1:vreg_128_align2, %3050:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2158.sub2_sub3:vreg_128_align2, %3033:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2170:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1728, 0, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2170.sub0_sub1:vreg_128_align2, %3050:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2170.sub2_sub3:vreg_128_align2, %3033:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %3345:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3345:vgpr_32, implicit $exec |
| %3344:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3344:vgpr_32, implicit $exec |
| %3343:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3343:vgpr_32, implicit $exec |
| %3342:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3342:vgpr_32, implicit $exec |
| %3341:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3341:vgpr_32, implicit $exec |
| %3340:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3340:vgpr_32, implicit $exec |
| %3339:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3339:vgpr_32, implicit $exec |
| %3338:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3338:vgpr_32, implicit $exec |
| %3337:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3337:vgpr_32, implicit $exec |
| %3336:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3336:vgpr_32, implicit $exec |
| %3335:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3335:vgpr_32, implicit $exec |
| %3334:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3334:vgpr_32, implicit $exec |
| %3333:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3333:vgpr_32, implicit $exec |
| %3332:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3332:vgpr_32, implicit $exec |
| %3331:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3331:vgpr_32, implicit $exec |
| %3330:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3330:vgpr_32, implicit $exec |
| %3329:vgpr_32 = nuw V_ADD_U32_e32 128, %3329:vgpr_32, implicit $exec |
| S_ENDPGM 0 |
| ... |