| # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s |
| |
| --- | |
| define amdgpu_kernel void @largeInterleave() #0 { ret void } |
| ; GCN-LABEL: largeInterleave: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: ; implicit-def: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 |
| ; GCN-NEXT: ; implicit-def: $vgpr0 |
| ; GCN-NEXT: ; implicit-def: $vgpr2 |
| ; GCN-NEXT: ; implicit-def: $vgpr1 |
| ; GCN-NEXT: ; implicit-def: $vgpr8 |
| ; GCN-NEXT: ; implicit-def: $vgpr94 |
| ; GCN-NEXT: ; implicit-def: $vgpr76_vgpr77_vgpr78_vgpr79 |
| ; GCN-NEXT: ; implicit-def: $vgpr106 |
| ; GCN-NEXT: ; implicit-def: $vgpr132 |
| ; GCN-NEXT: ; implicit-def: $vgpr133 |
| ; GCN-NEXT: ; implicit-def: $vgpr139 |
| ; GCN-NEXT: ; implicit-def: $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127 |
| ; GCN-NEXT: ; iglp_opt mask(0x00000002) |
| ; GCN-NEXT: ; implicit-def: $sgpr0 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_readfirstlane_b32 s7, v0 |
| ; GCN-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11 |
| ; GCN-NEXT: ; kill: killed $sgpr8_sgpr9_sgpr10_sgpr11 |
| ; GCN-NEXT: ; implicit-def: $sgpr5 |
| ; GCN-NEXT: s_nop 1 |
| ; GCN-NEXT: v_lshl_add_u32 v0, s7, 4, v2 |
| ; GCN-NEXT: v_mul_lo_u32 v0, v0, s6 |
| ; GCN-NEXT: v_add_lshl_u32 v92, v0, v1, 1 |
| ; GCN-NEXT: v_add_u32_e32 v93, s0, v92 |
| ; GCN-NEXT: buffer_load_dwordx4 v[0:3], v92, s[8:11], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx4 v[4:7], v93, s[8:11], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: s_lshl_b32 s0, s7, 7 |
| ; GCN-NEXT: v_add_lshl_u32 v95, v8, s0, 1 |
| ; GCN-NEXT: v_add_u32_e32 v8, 64, v93 |
| ; GCN-NEXT: ; kill: killed $vgpr8 |
| ; GCN-NEXT: ; implicit-def: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-NEXT: ; kill: killed $vgpr92 |
| ; GCN-NEXT: ; implicit-def: $sgpr6 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: ds_write_b128 v95, v[0:3] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b128 v95, v[4:7] offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_load_dwordx4 v[64:67], v92, s[8:11], 0 offen offset:64 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx4 v[68:71], v8, s[8:11], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: ds_read_b128 v[72:75], v94 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[80:83], v94 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[84:87], v94 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[72:73], v[76:77], 0 |
| ; GCN-NEXT: ds_read_b128 v[88:91], v94 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[74:75], v[78:79], v[48:63] |
| ; GCN-NEXT: ds_read_b128 v[72:75], v106 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[80:81], v[76:77], 0 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[84:85], v[76:77], 0 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[88:89], v[76:77], 0 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[82:83], v[78:79], v[32:47] |
| ; GCN-NEXT: ds_read_b128 v[80:83], v106 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[86:87], v[78:79], v[16:31] |
| ; GCN-NEXT: ds_read_b128 v[84:87], v106 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[90:91], v[78:79], v[0:15] |
| ; GCN-NEXT: ; implicit-def: $vgpr76_vgpr77_vgpr78_vgpr79 |
| ; GCN-NEXT: ds_read_b128 v[88:91], v106 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: ds_write_b128 v95, v[64:67] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[72:73], v[76:77], v[48:63] |
| ; GCN-NEXT: v_add_u32_e32 v72, 0x80, v93 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b128 v95, v[68:71] offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_load_dwordx4 v[64:67], v92, s[8:11], 0 offen offset:128 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx4 v[68:71], v72, s[8:11], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: ; kill: killed $vgpr72 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[74:75], v[78:79], v[48:63] |
| ; GCN-NEXT: ds_read_b128 v[72:75], v94 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[80:81], v[76:77], v[32:47] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[84:85], v[76:77], v[16:31] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[88:89], v[76:77], v[0:15] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[82:83], v[78:79], v[32:47] |
| ; GCN-NEXT: ds_read_b128 v[80:83], v94 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[86:87], v[78:79], v[16:31] |
| ; GCN-NEXT: ds_read_b128 v[84:87], v94 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[90:91], v[78:79], v[0:15] |
| ; GCN-NEXT: ; implicit-def: $vgpr76_vgpr77_vgpr78_vgpr79 |
| ; GCN-NEXT: ds_read_b128 v[88:91], v94 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[72:73], v[76:77], v[48:63] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[74:75], v[78:79], v[48:63] |
| ; GCN-NEXT: ds_read_b128 v[72:75], v106 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[80:81], v[76:77], v[32:47] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[84:85], v[76:77], v[16:31] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[88:89], v[76:77], v[0:15] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[82:83], v[78:79], v[32:47] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[86:87], v[78:79], v[16:31] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[90:91], v[78:79], v[0:15] |
| ; GCN-NEXT: ; implicit-def: $vgpr76_vgpr77_vgpr78_vgpr79 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[72:73], v[76:77], v[48:63] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[74:75], v[78:79], v[48:63] |
| ; GCN-NEXT: ds_read_b128 v[72:75], v106 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[72:73], v[76:77], v[32:47] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[74:75], v[78:79], v[32:47] |
| ; GCN-NEXT: ds_read_b128 v[72:75], v106 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[72:73], v[76:77], v[16:31] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[74:75], v[78:79], v[16:31] |
| ; GCN-NEXT: ds_read_b128 v[72:75], v106 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: ds_write_b128 v95, v[64:67] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b128 v95, v[68:71] offset:1024 |
| ; GCN-NEXT: ; implicit-def: $vgpr64 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[72:73], v[76:77], v[0:15] |
| ; GCN-NEXT: v_add_u32_e32 v72, 0xc0, v93 |
| ; GCN-NEXT: ; implicit-def: $vgpr73 |
| ; GCN-NEXT: v_add_u32_e32 v76, v132, v64 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_load_dwordx4 v[64:67], v92, s[8:11], 0 offen offset:192 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx4 v[68:71], v72, s[8:11], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ; kill: killed $vgpr72 |
| ; GCN-NEXT: v_add_u32_e32 v72, v132, v73 |
| ; GCN-NEXT: buffer_load_dwordx2 v[98:99], v76, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx2 v[102:103], v72, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[74:75], v[78:79], v[0:15] |
| ; GCN-NEXT: ; implicit-def: $vgpr74 |
| ; GCN-NEXT: v_add_u32_e32 v72, v132, v74 |
| ; GCN-NEXT: ; implicit-def: $vgpr75 |
| ; GCN-NEXT: buffer_load_dwordx2 v[100:101], v72, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_add_u32_e32 v72, v132, v75 |
| ; GCN-NEXT: buffer_load_dwordx2 v[104:105], v72, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: ds_read_b128 v[72:75], v94 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ; kill: killed $vgpr76 |
| ; GCN-NEXT: ; implicit-def: $vgpr76_vgpr77_vgpr78_vgpr79 |
| ; GCN-NEXT: ; implicit-def: $sgpr8 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[72:73], v[76:77], v[48:63] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[74:75], v[78:79], v[48:63] |
| ; GCN-NEXT: ds_read_b128 v[72:75], v94 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[72:73], v[76:77], v[32:47] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[74:75], v[78:79], v[32:47] |
| ; GCN-NEXT: ds_read_b128 v[72:75], v94 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[72:73], v[76:77], v[16:31] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[74:75], v[78:79], v[16:31] |
| ; GCN-NEXT: ds_read_b128 v[72:75], v94 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[72:73], v[76:77], v[0:15] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[74:75], v[78:79], v[0:15] |
| ; GCN-NEXT: ds_read_b128 v[72:75], v106 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ; implicit-def: $vgpr76_vgpr77_vgpr78_vgpr79 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[72:73], v[76:77], v[48:63] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[74:75], v[78:79], v[48:63] |
| ; GCN-NEXT: ds_read_b128 v[72:75], v106 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[72:73], v[76:77], v[32:47] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[74:75], v[78:79], v[32:47] |
| ; GCN-NEXT: ds_read_b128 v[72:75], v106 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[72:73], v[76:77], v[16:31] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[74:75], v[78:79], v[16:31] |
| ; GCN-NEXT: ds_read_b128 v[72:75], v106 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: ds_write_b128 v95, v[64:67] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b128 v95, v[68:71] offset:1024 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_read_b128 v[64:67], v94 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[90:93], v94 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[72:73], v[76:77], v[0:15] |
| ; GCN-NEXT: ; implicit-def: $vgpr68_vgpr69_vgpr70_vgpr71 |
| ; GCN-NEXT: ds_read_b128 v[84:87], v94 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[64:65], v[68:69], v[48:63] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[74:75], v[78:79], v[0:15] |
| ; GCN-NEXT: ds_read_b128 v[76:79], v94 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[94:97], v106 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[66:67], v[70:71], v[48:63] |
| ; GCN-NEXT: ; implicit-def: $vgpr64_vgpr65_vgpr66_vgpr67 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[90:91], v[68:69], v[32:47] |
| ; GCN-NEXT: ds_read_b128 v[88:91], v106 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[80:83], v106 offset:1024 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[72:75], v106 offset:1536 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[94:95], v[64:65], v[48:63] |
| ; GCN-NEXT: v_perm_b32 v94, v102, v98, s5 |
| ; GCN-NEXT: v_perm_b32 v98, v102, v98, s8 |
| ; GCN-NEXT: v_perm_b32 v102, v103, v99, s5 |
| ; GCN-NEXT: v_perm_b32 v95, v104, v100, s5 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[92:93], v[70:71], v[32:47] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[84:85], v[68:69], v[16:31] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[96:97], v[66:67], v[48:63] |
| ; GCN-NEXT: v_perm_b32 v96, v103, v99, s8 |
| ; GCN-NEXT: v_perm_b32 v99, v104, v100, s8 |
| ; GCN-NEXT: v_perm_b32 v103, v105, v101, s5 |
| ; GCN-NEXT: v_perm_b32 v97, v105, v101, s8 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[88:89], v[64:65], v[32:47] |
| ; GCN-NEXT: s_nop 5 |
| ; GCN-NEXT: v_mul_f32_e32 v100, s4, v48 |
| ; GCN-NEXT: v_mul_f32_e32 v101, s4, v49 |
| ; GCN-NEXT: v_max3_f32 v92, v100, s6, v101 |
| ; GCN-NEXT: v_mul_f32_e32 v93, s4, v50 |
| ; GCN-NEXT: v_mul_f32_e32 v100, s4, v51 |
| ; GCN-NEXT: v_max3_f32 v92, v92, v93, v100 |
| ; GCN-NEXT: v_mul_f32_e32 v93, s4, v52 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[86:87], v[70:71], v[16:31] |
| ; GCN-NEXT: v_mul_f32_e32 v100, s4, v53 |
| ; GCN-NEXT: v_max3_f32 v92, v92, v93, v100 |
| ; GCN-NEXT: v_mul_f32_e32 v84, s4, v54 |
| ; GCN-NEXT: v_mul_f32_e32 v85, s4, v55 |
| ; GCN-NEXT: v_max3_f32 v84, v92, v84, v85 |
| ; GCN-NEXT: v_mul_f32_e32 v85, s4, v56 |
| ; GCN-NEXT: v_mul_f32_e32 v92, s4, v57 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[76:77], v[68:69], v[0:15] |
| ; GCN-NEXT: v_max3_f32 v84, v84, v85, v92 |
| ; GCN-NEXT: v_mul_f32_e32 v85, s4, v58 |
| ; GCN-NEXT: v_mul_f32_e32 v88, s4, v59 |
| ; GCN-NEXT: v_max3_f32 v84, v84, v85, v88 |
| ; GCN-NEXT: v_mul_f32_e32 v85, s4, v60 |
| ; GCN-NEXT: v_mul_f32_e32 v88, s4, v61 |
| ; GCN-NEXT: v_max3_f32 v84, v84, v85, v88 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[90:91], v[66:67], v[32:47] |
| ; GCN-NEXT: v_mul_f32_e32 v85, s4, v62 |
| ; GCN-NEXT: v_mul_f32_e32 v88, s4, v63 |
| ; GCN-NEXT: v_max3_f32 v84, v84, v85, v88 |
| ; GCN-NEXT: ; implicit-def: $sgpr6 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[80:81], v[64:65], v[16:31] |
| ; GCN-NEXT: s_nop 6 |
| ; GCN-NEXT: v_mul_f32_e32 v85, s4, v32 |
| ; GCN-NEXT: v_mul_f32_e32 v88, s4, v33 |
| ; GCN-NEXT: v_max3_f32 v84, v84, v85, v88 |
| ; GCN-NEXT: v_mul_f32_e32 v85, s4, v34 |
| ; GCN-NEXT: v_mul_f32_e32 v88, s4, v35 |
| ; GCN-NEXT: v_max3_f32 v84, v84, v85, v88 |
| ; GCN-NEXT: v_mul_f32_e32 v85, s4, v36 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[78:79], v[70:71], v[0:15] |
| ; GCN-NEXT: v_mul_f32_e32 v86, s4, v37 |
| ; GCN-NEXT: v_max3_f32 v84, v84, v85, v86 |
| ; GCN-NEXT: v_mul_f32_e32 v85, s4, v38 |
| ; GCN-NEXT: v_mul_f32_e32 v86, s4, v39 |
| ; GCN-NEXT: v_max3_f32 v84, v84, v85, v86 |
| ; GCN-NEXT: v_mul_f32_e32 v85, s4, v40 |
| ; GCN-NEXT: v_mul_f32_e32 v80, s4, v41 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[82:83], v[66:67], v[16:31] |
| ; GCN-NEXT: v_max3_f32 v80, v84, v85, v80 |
| ; GCN-NEXT: v_mul_f32_e32 v81, s4, v42 |
| ; GCN-NEXT: v_mul_f32_e32 v84, s4, v43 |
| ; GCN-NEXT: v_max3_f32 v80, v80, v81, v84 |
| ; GCN-NEXT: v_mul_f32_e32 v81, s4, v44 |
| ; GCN-NEXT: v_mul_f32_e32 v84, s4, v45 |
| ; GCN-NEXT: v_max3_f32 v80, v80, v81, v84 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[72:73], v[64:65], v[0:15] |
| ; GCN-NEXT: v_mul_f32_e32 v81, s4, v46 |
| ; GCN-NEXT: v_mul_f32_e32 v82, s4, v47 |
| ; GCN-NEXT: v_max3_f32 v80, v80, v81, v82 |
| ; GCN-NEXT: v_mul_f32_e32 v81, s4, v16 |
| ; GCN-NEXT: v_mul_f32_e32 v82, s4, v17 |
| ; GCN-NEXT: v_max3_f32 v80, v80, v81, v82 |
| ; GCN-NEXT: v_mul_f32_e32 v68, s4, v18 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[74:75], v[66:67], v[0:15] |
| ; GCN-NEXT: v_mul_f32_e32 v69, s4, v19 |
| ; GCN-NEXT: v_max3_f32 v68, v80, v68, v69 |
| ; GCN-NEXT: v_mul_f32_e32 v69, s4, v20 |
| ; GCN-NEXT: v_mul_f32_e32 v76, s4, v21 |
| ; GCN-NEXT: v_max3_f32 v68, v68, v69, v76 |
| ; GCN-NEXT: v_mul_f32_e32 v69, s4, v22 |
| ; GCN-NEXT: v_mul_f32_e32 v70, s4, v23 |
| ; GCN-NEXT: v_max3_f32 v68, v68, v69, v70 |
| ; GCN-NEXT: v_mul_f32_e32 v69, s4, v24 |
| ; GCN-NEXT: v_mul_f32_e32 v70, s4, v25 |
| ; GCN-NEXT: v_max3_f32 v68, v68, v69, v70 |
| ; GCN-NEXT: v_mul_f32_e32 v69, s4, v26 |
| ; GCN-NEXT: v_mul_f32_e32 v70, s4, v27 |
| ; GCN-NEXT: v_max3_f32 v64, v68, v69, v70 |
| ; GCN-NEXT: v_mul_f32_e32 v65, s4, v28 |
| ; GCN-NEXT: v_mul_f32_e32 v68, s4, v29 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v65, v68 |
| ; GCN-NEXT: v_mul_f32_e32 v65, s4, v30 |
| ; GCN-NEXT: v_mul_f32_e32 v68, s4, v31 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v65, v68 |
| ; GCN-NEXT: v_mul_f32_e32 v65, s4, v0 |
| ; GCN-NEXT: v_mul_f32_e32 v66, s4, v1 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66 |
| ; GCN-NEXT: v_mul_f32_e32 v65, s4, v2 |
| ; GCN-NEXT: v_mul_f32_e32 v66, s4, v3 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66 |
| ; GCN-NEXT: v_mul_f32_e32 v65, s4, v4 |
| ; GCN-NEXT: v_mul_f32_e32 v66, s4, v5 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66 |
| ; GCN-NEXT: v_mul_f32_e32 v65, s4, v6 |
| ; GCN-NEXT: v_mul_f32_e32 v66, s4, v7 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66 |
| ; GCN-NEXT: v_mul_f32_e32 v65, s4, v8 |
| ; GCN-NEXT: v_mul_f32_e32 v66, s4, v9 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66 |
| ; GCN-NEXT: v_mul_f32_e32 v65, s4, v10 |
| ; GCN-NEXT: v_mul_f32_e32 v66, s4, v11 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66 |
| ; GCN-NEXT: v_mul_f32_e32 v65, s4, v12 |
| ; GCN-NEXT: v_mul_f32_e32 v66, s4, v13 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66 |
| ; GCN-NEXT: v_mul_f32_e32 v65, s4, v14 |
| ; GCN-NEXT: v_mul_f32_e32 v66, s4, v15 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66 |
| ; GCN-NEXT: ; implicit-def: $vgpr65 |
| ; GCN-NEXT: ; implicit-def: $vgpr66 |
| ; GCN-NEXT: ; implicit-def: $vgpr68 |
| ; GCN-NEXT: ; implicit-def: $vgpr67 |
| ; GCN-NEXT: v_add_u32_e32 v65, s7, v65 |
| ; GCN-NEXT: v_and_b32_e32 v65, 0x1fffffff, v65 |
| ; GCN-NEXT: v_mul_lo_u32 v65, v65, s6 |
| ; GCN-NEXT: v_add_lshl_u32 v135, v66, v65, 1 |
| ; GCN-NEXT: ds_bpermute_b32 v65, v133, v64 |
| ; GCN-NEXT: ; implicit-def: $vgpr66 |
| ; GCN-NEXT: v_lshl_add_u32 v136, v66, 1, v135 |
| ; GCN-NEXT: ; implicit-def: $vgpr66 |
| ; GCN-NEXT: v_lshl_add_u32 v137, v66, 1, v136 |
| ; GCN-NEXT: ; implicit-def: $vgpr66 |
| ; GCN-NEXT: ; implicit-def: $sgpr6_sgpr7 |
| ; GCN-NEXT: v_lshl_add_u32 v138, v66, 1, v137 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v135, v[94:95] |
| ; GCN-NEXT: v_max_f32_e32 v65, v65, v65 |
| ; GCN-NEXT: v_max_f32_e32 v64, v64, v65 |
| ; GCN-NEXT: ds_bpermute_b32 v65, v133, v64 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v136, v[98:99] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v137, v[102:103] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v138, v[96:97] |
| ; GCN-NEXT: v_add_u32_e32 v68, v132, v68 |
| ; GCN-NEXT: v_cndmask_b32_e64 v64, v65, v64, s[6:7] |
| ; GCN-NEXT: v_max_f32_e32 v64, v64, v64 |
| ; GCN-NEXT: ; implicit-def: $vgpr65 |
| ; GCN-NEXT: v_max_f32_e32 v66, v65, v65 |
| ; GCN-NEXT: v_max_f32_e32 v134, v66, v64 |
| ; GCN-NEXT: ; implicit-def: $vgpr64 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_load_dwordx2 v[156:157], v68, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_add_u32_e32 v64, v132, v64 |
| ; GCN-NEXT: buffer_load_dwordx2 v[158:159], v64, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ; implicit-def: $vgpr66 |
| ; GCN-NEXT: v_add_u32_e32 v64, v132, v66 |
| ; GCN-NEXT: buffer_load_dwordx2 v[128:129], v64, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_add_u32_e32 v64, v132, v67 |
| ; GCN-NEXT: buffer_load_dwordx2 v[130:131], v64, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_fma_f32 v57, s4, v57, -v134 |
| ; GCN-NEXT: v_fma_f32 v48, s4, v48, -v134 |
| ; GCN-NEXT: v_fma_f32 v96, s4, v58, -v134 |
| ; GCN-NEXT: v_mul_f32_e32 v57, 0x3fb8aa3b, v57 |
| ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v48 |
| ; GCN-NEXT: v_fma_f32 v64, s4, v49, -v134 |
| ; GCN-NEXT: v_exp_f32_e32 v163, v57 |
| ; GCN-NEXT: v_mul_f32_e32 v57, 0x3fb8aa3b, v96 |
| ; GCN-NEXT: v_fma_f32 v66, s4, v50, -v134 |
| ; GCN-NEXT: v_exp_f32_e32 v164, v57 |
| ; GCN-NEXT: v_exp_f32_e32 v49, v48 |
| ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v64 |
| ; GCN-NEXT: v_fma_f32 v67, s4, v51, -v134 |
| ; GCN-NEXT: v_exp_f32_e32 v50, v48 |
| ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v66 |
| ; GCN-NEXT: v_fma_f32 v68, s4, v52, -v134 |
| ; GCN-NEXT: v_exp_f32_e32 v51, v48 |
| ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v67 |
| ; GCN-NEXT: v_fma_f32 v69, s4, v53, -v134 |
| ; GCN-NEXT: v_exp_f32_e32 v52, v48 |
| ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v68 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_fma_f32 v70, s4, v54, -v134 |
| ; GCN-NEXT: v_exp_f32_e32 v53, v48 |
| ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v69 |
| ; GCN-NEXT: v_fma_f32 v71, s4, v55, -v134 |
| ; GCN-NEXT: ds_read_b128 v[140:143], v139 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_exp_f32_e32 v54, v48 |
| ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v70 |
| ; GCN-NEXT: v_exp_f32_e32 v55, v48 |
| ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v71 |
| ; GCN-NEXT: ds_read_b128 v[144:147], v139 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_fma_f32 v66, s4, v56, -v134 |
| ; GCN-NEXT: v_exp_f32_e32 v56, v48 |
| ; GCN-NEXT: v_sub_f32_e32 v48, v65, v134 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v64, v49 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v67, v50 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v68, v51 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v58, v52 |
| ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v48 |
| ; GCN-NEXT: ds_read_b128 v[148:151], v139 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_exp_f32_e32 v48, v48 |
| ; GCN-NEXT: v_pack_b32_f16 v161, v68, v58 |
| ; GCN-NEXT: v_pack_b32_f16 v160, v64, v67 |
| ; GCN-NEXT: v_mul_f32_e32 v58, 0x3fb8aa3b, v66 |
| ; GCN-NEXT: ; implicit-def: $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79 |
| ; GCN-NEXT: ds_read_b128 v[152:155], v139 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_fma_f32 v162, s4, v61, -v134 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v61, v55 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v57, v56 |
| ; GCN-NEXT: v_pk_mul_f32 v[64:65], v[64:65], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[66:67], v[66:67], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[68:69], v[68:69], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[70:71], v[70:71], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[72:73], v[72:73], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[74:75], v[74:75], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[76:77], v[76:77], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[78:79], v[78:79], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: ; implicit-def: $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95 |
| ; GCN-NEXT: v_fma_f32 v59, s4, v59, -v134 |
| ; GCN-NEXT: v_pk_mul_f32 v[80:81], v[80:81], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[140:141], v[160:161], v[64:79] |
| ; GCN-NEXT: v_pk_mul_f32 v[82:83], v[82:83], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[84:85], v[84:85], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[86:87], v[86:87], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[88:89], v[88:89], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[90:91], v[90:91], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[92:93], v[92:93], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[94:95], v[94:95], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: ; implicit-def: $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111 |
| ; GCN-NEXT: v_exp_f32_e32 v58, v58 |
| ; GCN-NEXT: v_pk_mul_f32 v[96:97], v[96:97], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[144:145], v[160:161], v[80:95] |
| ; GCN-NEXT: v_pk_mul_f32 v[98:99], v[98:99], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[100:101], v[100:101], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[102:103], v[102:103], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[104:105], v[104:105], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[106:107], v[106:107], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[108:109], v[108:109], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[110:111], v[110:111], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pack_b32_f16 v145, v61, v57 |
| ; GCN-NEXT: v_mul_f32_e32 v57, 0x3fb8aa3b, v59 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v140, v53 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v141, v54 |
| ; GCN-NEXT: v_exp_f32_e32 v59, v57 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[148:149], v[160:161], v[96:111] |
| ; GCN-NEXT: v_fma_f32 v60, s4, v60, -v134 |
| ; GCN-NEXT: v_pk_mul_f32 v[112:113], v[112:113], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[114:115], v[114:115], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[116:117], v[116:117], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[118:119], v[118:119], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[120:121], v[120:121], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[122:123], v[122:123], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[124:125], v[124:125], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[126:127], v[126:127], v[48:49] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_fma_f32 v148, s4, v62, -v134 |
| ; GCN-NEXT: v_pack_b32_f16 v144, v140, v141 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[152:153], v[160:161], v[112:127] |
| ; GCN-NEXT: v_fma_f32 v152, s4, v63, -v134 |
| ; GCN-NEXT: v_mul_f32_e32 v149, 0x3fb8aa3b, v60 |
| ; GCN-NEXT: ; implicit-def: $vgpr57 |
| ; GCN-NEXT: ds_read_b128 v[60:63], v57 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_exp_f32_e32 v160, v149 |
| ; GCN-NEXT: v_fma_f32 v161, s4, v33, -v134 |
| ; GCN-NEXT: v_mul_f32_e32 v33, 0x3fb8aa3b, v148 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v153, v58 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[142:143], v[144:145], v[64:79] |
| ; GCN-NEXT: v_fma_f32 v32, s4, v32, -v134 |
| ; GCN-NEXT: ds_read_b128 v[140:143], v57 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_fma_f32 v40, s4, v40, -v134 |
| ; GCN-NEXT: v_fma_f32 v44, s4, v44, -v134 |
| ; GCN-NEXT: v_fma_f32 v16, s4, v16, -v134 |
| ; GCN-NEXT: v_fma_f32 v166, s4, v20, -v134 |
| ; GCN-NEXT: v_fma_f32 v24, s4, v24, -v134 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[146:147], v[144:145], v[80:95] |
| ; GCN-NEXT: v_mul_f32_e32 v146, 0x3fb8aa3b, v162 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v147, v163 |
| ; GCN-NEXT: v_exp_f32_e32 v162, v146 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v146, v164 |
| ; GCN-NEXT: v_fma_f32 v28, s4, v28, -v134 |
| ; GCN-NEXT: v_pack_b32_f16 v148, v153, v147 |
| ; GCN-NEXT: v_fma_f32 v0, s4, v0, -v134 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[150:151], v[144:145], v[96:111] |
| ; GCN-NEXT: v_exp_f32_e32 v151, v33 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v33, v59 |
| ; GCN-NEXT: v_fma_f32 v150, s4, v34, -v134 |
| ; GCN-NEXT: v_fma_f32 v8, s4, v8, -v134 |
| ; GCN-NEXT: v_fma_f32 v12, s4, v12, -v134 |
| ; GCN-NEXT: v_pack_b32_f16 v149, v146, v33 |
| ; GCN-NEXT: v_mul_f32_e32 v33, 0x3fb8aa3b, v152 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[154:155], v[144:145], v[112:127] |
| ; GCN-NEXT: v_fma_f32 v152, s4, v35, -v134 |
| ; GCN-NEXT: v_exp_f32_e32 v153, v33 |
| ; GCN-NEXT: v_fma_f32 v155, s4, v36, -v134 |
| ; GCN-NEXT: v_perm_b32 v36, v158, v156, s5 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v154, v160 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[60:61], v[148:149], v[64:79] |
| ; GCN-NEXT: v_mul_f32_e32 v60, 0x3fb8aa3b, v32 |
| ; GCN-NEXT: ds_read_b128 v[32:35], v57 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[144:147], v57 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mul_f32_e32 v61, 0x3fb8aa3b, v161 |
| ; GCN-NEXT: v_exp_f32_e32 v165, v60 |
| ; GCN-NEXT: v_perm_b32 v60, v158, v156, s8 |
| ; GCN-NEXT: v_fma_f32 v158, s4, v37, -v134 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[140:141], v[148:149], v[80:95] |
| ; GCN-NEXT: v_exp_f32_e32 v161, v61 |
| ; GCN-NEXT: v_perm_b32 v140, v159, v157, s8 |
| ; GCN-NEXT: v_perm_b32 v37, v130, v128, s5 |
| ; GCN-NEXT: v_perm_b32 v61, v130, v128, s8 |
| ; GCN-NEXT: v_perm_b32 v141, v131, v129, s8 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: ds_write_b64 v135, v[36:37] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[32:33], v[148:149], v[96:111] |
| ; GCN-NEXT: v_perm_b32 v32, v159, v157, s5 |
| ; GCN-NEXT: v_mul_f32_e32 v33, 0x3fb8aa3b, v150 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v150, v151 |
| ; GCN-NEXT: v_fma_f32 v157, s4, v38, -v134 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v38, v153 |
| ; GCN-NEXT: v_exp_f32_e32 v159, v33 |
| ; GCN-NEXT: v_perm_b32 v33, v131, v129, s5 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[144:145], v[148:149], v[112:127] |
| ; GCN-NEXT: v_pack_b32_f16 v129, v150, v38 |
| ; GCN-NEXT: v_mul_f32_e32 v38, 0x3fb8aa3b, v152 |
| ; GCN-NEXT: v_exp_f32_e32 v152, v38 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v136, v[60:61] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v137, v[32:33] |
| ; GCN-NEXT: ; implicit-def: $vgpr33 |
| ; GCN-NEXT: ; implicit-def: $vgpr38 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v138, v[140:141] |
| ; GCN-NEXT: v_add_u32_e32 v38, v132, v38 |
| ; GCN-NEXT: v_add_u32_e32 v33, v132, v33 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_load_dwordx2 v[130:131], v38, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx2 v[140:141], v33, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ; implicit-def: $vgpr36 |
| ; GCN-NEXT: v_add_u32_e32 v33, v132, v36 |
| ; GCN-NEXT: ; implicit-def: $vgpr37 |
| ; GCN-NEXT: buffer_load_dwordx2 v[144:145], v33, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_add_u32_e32 v33, v132, v37 |
| ; GCN-NEXT: buffer_load_dwordx2 v[148:149], v33, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v156, v162 |
| ; GCN-NEXT: v_mul_f32_e32 v32, 0x3fb8aa3b, v155 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v33, v165 |
| ; GCN-NEXT: v_pack_b32_f16 v128, v154, v156 |
| ; GCN-NEXT: v_fma_f32 v150, s4, v39, -v134 |
| ; GCN-NEXT: ds_read_b128 v[36:39], v139 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[62:63], v[128:129], v[64:79] |
| ; GCN-NEXT: v_exp_f32_e32 v154, v32 |
| ; GCN-NEXT: v_mul_f32_e32 v32, 0x3fb8aa3b, v158 |
| ; GCN-NEXT: ds_read_b128 v[60:63], v139 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_fma_f32 v156, s4, v42, -v134 |
| ; GCN-NEXT: v_perm_b32 v20, v140, v130, s5 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[142:143], v[128:129], v[80:95] |
| ; GCN-NEXT: v_exp_f32_e32 v155, v32 |
| ; GCN-NEXT: v_mul_f32_e32 v32, 0x3fb8aa3b, v157 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v142, v161 |
| ; GCN-NEXT: v_fma_f32 v143, s4, v41, -v134 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[34:35], v[128:129], v[96:111] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v34, v159 |
| ; GCN-NEXT: v_exp_f32_e32 v157, v32 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v32, v152 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[146:147], v[128:129], v[112:127] |
| ; GCN-NEXT: v_pack_b32_f16 v129, v34, v32 |
| ; GCN-NEXT: v_mul_f32_e32 v32, 0x3fb8aa3b, v150 |
| ; GCN-NEXT: v_pack_b32_f16 v128, v33, v142 |
| ; GCN-NEXT: v_exp_f32_e32 v146, v32 |
| ; GCN-NEXT: ds_read_b128 v[32:35], v139 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_fma_f32 v142, s4, v43, -v134 |
| ; GCN-NEXT: v_fma_f32 v150, s4, v46, -v134 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[36:37], v[128:129], v[64:79] |
| ; GCN-NEXT: v_mul_f32_e32 v36, 0x3fb8aa3b, v40 |
| ; GCN-NEXT: ds_read_b128 v[40:43], v139 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_exp_f32_e32 v147, v36 |
| ; GCN-NEXT: v_mul_f32_e32 v36, 0x3fb8aa3b, v143 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v37, v154 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[60:61], v[128:129], v[80:95] |
| ; GCN-NEXT: v_exp_f32_e32 v143, v36 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v60, v155 |
| ; GCN-NEXT: v_mul_f32_e32 v36, 0x3fb8aa3b, v142 |
| ; GCN-NEXT: v_fma_f32 v61, s4, v45, -v134 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[32:33], v[128:129], v[96:111] |
| ; GCN-NEXT: v_mul_f32_e32 v32, 0x3fb8aa3b, v156 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v33, v157 |
| ; GCN-NEXT: v_exp_f32_e32 v156, v32 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v32, v146 |
| ; GCN-NEXT: v_pack_b32_f16 v33, v33, v32 |
| ; GCN-NEXT: v_pack_b32_f16 v32, v37, v60 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[40:41], v[128:129], v[112:127] |
| ; GCN-NEXT: v_exp_f32_e32 v129, v36 |
| ; GCN-NEXT: v_mul_f32_e32 v40, 0x3fb8aa3b, v44 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v60, v147 |
| ; GCN-NEXT: v_fma_f32 v128, s4, v47, -v134 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[38:39], v[32:33], v[64:79] |
| ; GCN-NEXT: ds_read_b128 v[36:39], v57 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_exp_f32_e32 v142, v40 |
| ; GCN-NEXT: v_mul_f32_e32 v40, 0x3fb8aa3b, v61 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v61, v143 |
| ; GCN-NEXT: ds_read_b128 v[44:47], v57 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[62:63], v[32:33], v[80:95] |
| ; GCN-NEXT: v_fma_f32 v62, s4, v17, -v134 |
| ; GCN-NEXT: v_mul_f32_e32 v17, 0x3fb8aa3b, v150 |
| ; GCN-NEXT: v_exp_f32_e32 v63, v40 |
| ; GCN-NEXT: v_pack_b32_f16 v40, v60, v61 |
| ; GCN-NEXT: v_fma_f32 v150, s4, v18, -v134 |
| ; GCN-NEXT: v_fma_f32 v60, s4, v19, -v134 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v61, v142 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[34:35], v[32:33], v[96:111] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v34, v156 |
| ; GCN-NEXT: v_exp_f32_e32 v158, v17 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v17, v129 |
| ; GCN-NEXT: v_pack_b32_f16 v41, v34, v17 |
| ; GCN-NEXT: v_mul_f32_e32 v17, 0x3fb8aa3b, v128 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[42:43], v[32:33], v[112:127] |
| ; GCN-NEXT: v_exp_f32_e32 v128, v17 |
| ; GCN-NEXT: v_perm_b32 v42, v141, v131, s8 |
| ; GCN-NEXT: v_perm_b32 v43, v149, v145, s8 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[36:37], v[40:41], v[64:79] |
| ; GCN-NEXT: v_mul_f32_e32 v36, 0x3fb8aa3b, v16 |
| ; GCN-NEXT: ds_read_b128 v[16:19], v57 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[32:35], v57 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mul_f32_e32 v37, 0x3fb8aa3b, v62 |
| ; GCN-NEXT: v_exp_f32_e32 v167, v36 |
| ; GCN-NEXT: v_perm_b32 v36, v140, v130, s8 |
| ; GCN-NEXT: v_fma_f32 v62, s4, v21, -v134 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[44:45], v[40:41], v[80:95] |
| ; GCN-NEXT: v_exp_f32_e32 v130, v37 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v45, v158 |
| ; GCN-NEXT: v_perm_b32 v21, v148, v144, s5 |
| ; GCN-NEXT: v_perm_b32 v37, v148, v144, s8 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v44, v63 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: ds_write_b64 v135, v[20:21] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[16:17], v[40:41], v[96:111] |
| ; GCN-NEXT: v_perm_b32 v16, v141, v131, s5 |
| ; GCN-NEXT: v_fma_f32 v131, s4, v22, -v134 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v22, v128 |
| ; GCN-NEXT: v_mul_f32_e32 v17, 0x3fb8aa3b, v150 |
| ; GCN-NEXT: v_exp_f32_e32 v140, v17 |
| ; GCN-NEXT: v_perm_b32 v17, v149, v145, s5 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v136, v[36:37] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[32:33], v[40:41], v[112:127] |
| ; GCN-NEXT: v_pack_b32_f16 v33, v45, v22 |
| ; GCN-NEXT: v_mul_f32_e32 v22, 0x3fb8aa3b, v60 |
| ; GCN-NEXT: v_exp_f32_e32 v144, v22 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v137, v[16:17] |
| ; GCN-NEXT: ; implicit-def: $vgpr17 |
| ; GCN-NEXT: ; implicit-def: $vgpr22 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v138, v[42:43] |
| ; GCN-NEXT: v_add_u32_e32 v22, v132, v22 |
| ; GCN-NEXT: v_add_u32_e32 v17, v132, v17 |
| ; GCN-NEXT: ; implicit-def: $vgpr20 |
| ; GCN-NEXT: ; implicit-def: $vgpr21 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_load_dwordx2 v[40:41], v22, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx2 v[42:43], v17, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_add_u32_e32 v20, v132, v20 |
| ; GCN-NEXT: v_add_u32_e32 v21, v132, v21 |
| ; GCN-NEXT: v_pack_b32_f16 v32, v61, v44 |
| ; GCN-NEXT: buffer_load_dwordx2 v[44:45], v20, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx2 v[60:61], v21, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mul_f32_e32 v16, 0x3fb8aa3b, v166 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[38:39], v[32:33], v[64:79] |
| ; GCN-NEXT: v_exp_f32_e32 v132, v16 |
| ; GCN-NEXT: v_mul_f32_e32 v16, 0x3fb8aa3b, v62 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v17, v167 |
| ; GCN-NEXT: v_fma_f32 v141, s4, v23, -v134 |
| ; GCN-NEXT: ds_read_b128 v[20:23], v139 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[36:39], v139 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[46:47], v[32:33], v[80:95] |
| ; GCN-NEXT: v_exp_f32_e32 v62, v16 |
| ; GCN-NEXT: v_mul_f32_e32 v16, 0x3fb8aa3b, v131 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v46, v130 |
| ; GCN-NEXT: v_fma_f32 v47, s4, v25, -v134 |
| ; GCN-NEXT: v_fma_f32 v131, s4, v26, -v134 |
| ; GCN-NEXT: v_fma_f32 v149, s4, v4, -v134 |
| ; GCN-NEXT: ; implicit-def: $sgpr0 |
| ; GCN-NEXT: v_perm_b32 v4, v42, v40, s5 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[18:19], v[32:33], v[96:111] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v18, v140 |
| ; GCN-NEXT: v_exp_f32_e32 v145, v16 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v16, v144 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[34:35], v[32:33], v[112:127] |
| ; GCN-NEXT: v_pack_b32_f16 v33, v18, v16 |
| ; GCN-NEXT: v_mul_f32_e32 v16, 0x3fb8aa3b, v141 |
| ; GCN-NEXT: v_pack_b32_f16 v32, v17, v46 |
| ; GCN-NEXT: v_exp_f32_e32 v35, v16 |
| ; GCN-NEXT: ds_read_b128 v[16:19], v139 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_fma_f32 v34, s4, v27, -v134 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[20:21], v[32:33], v[64:79] |
| ; GCN-NEXT: v_mul_f32_e32 v20, 0x3fb8aa3b, v24 |
| ; GCN-NEXT: ds_read_b128 v[24:27], v139 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_exp_f32_e32 v46, v20 |
| ; GCN-NEXT: v_mul_f32_e32 v20, 0x3fb8aa3b, v47 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v21, v132 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[36:37], v[32:33], v[80:95] |
| ; GCN-NEXT: v_exp_f32_e32 v47, v20 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v36, v62 |
| ; GCN-NEXT: v_mul_f32_e32 v20, 0x3fb8aa3b, v34 |
| ; GCN-NEXT: v_fma_f32 v37, s4, v29, -v134 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v34, v46 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[16:17], v[32:33], v[96:111] |
| ; GCN-NEXT: v_mul_f32_e32 v16, 0x3fb8aa3b, v131 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v17, v145 |
| ; GCN-NEXT: v_exp_f32_e32 v141, v16 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v16, v35 |
| ; GCN-NEXT: v_fma_f32 v131, s4, v30, -v134 |
| ; GCN-NEXT: v_pack_b32_f16 v17, v17, v16 |
| ; GCN-NEXT: v_pack_b32_f16 v16, v21, v36 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[24:25], v[32:33], v[112:127] |
| ; GCN-NEXT: v_exp_f32_e32 v33, v20 |
| ; GCN-NEXT: v_mul_f32_e32 v24, 0x3fb8aa3b, v28 |
| ; GCN-NEXT: v_fma_f32 v32, s4, v31, -v134 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[22:23], v[16:17], v[64:79] |
| ; GCN-NEXT: ds_read_b128 v[20:23], v57 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_exp_f32_e32 v36, v24 |
| ; GCN-NEXT: v_mul_f32_e32 v24, 0x3fb8aa3b, v37 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v37, v47 |
| ; GCN-NEXT: ds_read_b128 v[28:31], v57 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[38:39], v[16:17], v[80:95] |
| ; GCN-NEXT: v_fma_f32 v38, s4, v1, -v134 |
| ; GCN-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v131 |
| ; GCN-NEXT: v_exp_f32_e32 v39, v24 |
| ; GCN-NEXT: v_pack_b32_f16 v24, v34, v37 |
| ; GCN-NEXT: v_fma_f32 v131, s4, v2, -v134 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v37, v36 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[18:19], v[16:17], v[96:111] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v18, v141 |
| ; GCN-NEXT: v_exp_f32_e32 v148, v1 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v1, v33 |
| ; GCN-NEXT: v_pack_b32_f16 v25, v18, v1 |
| ; GCN-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v32 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[26:27], v[16:17], v[112:127] |
| ; GCN-NEXT: v_fma_f32 v32, s4, v3, -v134 |
| ; GCN-NEXT: v_exp_f32_e32 v34, v1 |
| ; GCN-NEXT: v_perm_b32 v26, v43, v41, s8 |
| ; GCN-NEXT: v_perm_b32 v27, v61, v45, s8 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[20:21], v[24:25], v[64:79] |
| ; GCN-NEXT: v_mul_f32_e32 v20, 0x3fb8aa3b, v0 |
| ; GCN-NEXT: ds_read_b128 v[0:3], v57 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[16:19], v57 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mul_f32_e32 v21, 0x3fb8aa3b, v38 |
| ; GCN-NEXT: v_exp_f32_e32 v150, v20 |
| ; GCN-NEXT: v_perm_b32 v20, v42, v40, s8 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v40, v148 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[28:29], v[24:25], v[80:95] |
| ; GCN-NEXT: v_exp_f32_e32 v38, v21 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v28, v39 |
| ; GCN-NEXT: v_fma_f32 v29, s4, v5, -v134 |
| ; GCN-NEXT: v_perm_b32 v5, v60, v44, s5 |
| ; GCN-NEXT: v_perm_b32 v21, v60, v44, s8 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: ds_write_b64 v135, v[4:5] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[0:1], v[24:25], v[96:111] |
| ; GCN-NEXT: v_perm_b32 v0, v43, v41, s5 |
| ; GCN-NEXT: v_fma_f32 v41, s4, v6, -v134 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v6, v34 |
| ; GCN-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v131 |
| ; GCN-NEXT: v_exp_f32_e32 v42, v1 |
| ; GCN-NEXT: v_perm_b32 v1, v61, v45, s5 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v136, v[20:21] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v137, v[0:1] |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b64 v138, v[26:27] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[16:17], v[24:25], v[112:127] |
| ; GCN-NEXT: v_pack_b32_f16 v17, v40, v6 |
| ; GCN-NEXT: v_mul_f32_e32 v6, 0x3fb8aa3b, v32 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_pack_b32_f16 v16, v37, v28 |
| ; GCN-NEXT: v_fma_f32 v24, s4, v7, -v134 |
| ; GCN-NEXT: v_exp_f32_e32 v25, v6 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_read_b128 v[4:7], v139 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[22:23], v[16:17], v[64:79] |
| ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v149 |
| ; GCN-NEXT: v_exp_f32_e32 v26, v0 |
| ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v29 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v1, v150 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v27, v38 |
| ; GCN-NEXT: ds_read_b128 v[20:23], v139 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_fma_f32 v28, s4, v9, -v134 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[30:31], v[16:17], v[80:95] |
| ; GCN-NEXT: v_exp_f32_e32 v29, v0 |
| ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v41 |
| ; GCN-NEXT: v_fma_f32 v30, s4, v10, -v134 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[2:3], v[16:17], v[96:111] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v2, v42 |
| ; GCN-NEXT: v_exp_f32_e32 v31, v0 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v0, v25 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[18:19], v[16:17], v[112:127] |
| ; GCN-NEXT: v_pack_b32_f16 v17, v2, v0 |
| ; GCN-NEXT: v_pack_b32_f16 v16, v1, v27 |
| ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v24 |
| ; GCN-NEXT: v_fma_f32 v18, s4, v11, -v134 |
| ; GCN-NEXT: v_exp_f32_e32 v19, v0 |
| ; GCN-NEXT: ds_read_b128 v[0:3], v139 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[4:5], v[16:17], v[64:79] |
| ; GCN-NEXT: v_mul_f32_e32 v4, 0x3fb8aa3b, v8 |
| ; GCN-NEXT: ds_read_b128 v[8:11], v139 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_exp_f32_e32 v24, v4 |
| ; GCN-NEXT: v_mul_f32_e32 v4, 0x3fb8aa3b, v28 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v5, v26 |
| ; GCN-NEXT: v_exp_f32_e32 v27, v4 |
| ; GCN-NEXT: v_mul_f32_e32 v4, 0x3fb8aa3b, v18 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[20:21], v[16:17], v[80:95] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v20, v29 |
| ; GCN-NEXT: v_fma_f32 v21, s4, v13, -v134 |
| ; GCN-NEXT: v_fma_f32 v28, s4, v14, -v134 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[0:1], v[16:17], v[96:111] |
| ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v30 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v1, v31 |
| ; GCN-NEXT: v_exp_f32_e32 v30, v0 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v0, v19 |
| ; GCN-NEXT: v_pack_b32_f16 v1, v1, v0 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[8:9], v[16:17], v[112:127] |
| ; GCN-NEXT: v_exp_f32_e32 v16, v4 |
| ; GCN-NEXT: v_pack_b32_f16 v0, v5, v20 |
| ; GCN-NEXT: v_mul_f32_e32 v9, 0x3fb8aa3b, v12 |
| ; GCN-NEXT: v_exp_f32_e32 v18, v9 |
| ; GCN-NEXT: v_mul_f32_e32 v9, 0x3fb8aa3b, v21 |
| ; GCN-NEXT: v_exp_f32_e32 v21, v9 |
| ; GCN-NEXT: v_fma_f32 v8, s4, v15, -v134 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[6:7], v[0:1], v[64:79] |
| ; GCN-NEXT: ds_read_b128 v[4:7], v57 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[12:15], v57 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v17, v24 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v20, v27 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[22:23], v[0:1], v[80:95] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v22, v21 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v23, v18 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[2:3], v[0:1], v[96:111] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v3, v30 |
| ; GCN-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v28 |
| ; GCN-NEXT: v_exp_f32_e32 v2, v2 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[10:11], v[0:1], v[112:127] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v0, v16 |
| ; GCN-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v8 |
| ; GCN-NEXT: v_exp_f32_e32 v10, v1 |
| ; GCN-NEXT: v_pack_b32_f16 v8, v17, v20 |
| ; GCN-NEXT: v_pack_b32_f16 v9, v3, v0 |
| ; GCN-NEXT: v_add_f32_e32 v3, 0, v49 |
| ; GCN-NEXT: v_add_f32_e32 v3, v50, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v51, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v52, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v53, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v54, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v55, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v56, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v58, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v163, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v164, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v59, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v160, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v162, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v151, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v153, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v165, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v161, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v159, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v152, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v154, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v155, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v157, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v146, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v147, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v143, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v156, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v129, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v142, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v63, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v158, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v128, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v167, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v130, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v140, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v144, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v132, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v62, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v145, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v35, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v46, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v47, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v141, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v33, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v36, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v39, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v148, v3 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[12:13], v[8:9], v[80:95] |
| ; GCN-NEXT: v_add_f32_e32 v3, v34, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v150, v3 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v1, v10 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v11, v2 |
| ; GCN-NEXT: v_add_f32_e32 v3, v38, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v42, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v25, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v26, v3 |
| ; GCN-NEXT: v_pack_b32_f16 v1, v11, v1 |
| ; GCN-NEXT: v_pack_b32_f16 v0, v23, v22 |
| ; GCN-NEXT: v_add_f32_e32 v3, v29, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v31, v3 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[14:15], v[0:1], v[80:95] |
| ; GCN-NEXT: v_add_f32_e32 v3, v19, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v24, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v27, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v30, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v16, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v18, v3 |
| ; GCN-NEXT: v_add_f32_e32 v3, v21, v3 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[4:5], v[8:9], v[64:79] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[6:7], v[0:1], v[64:79] |
| ; GCN-NEXT: v_add_f32_e32 v0, v2, v3 |
| ; GCN-NEXT: v_add_f32_e32 v4, v10, v0 |
| ; GCN-NEXT: ds_bpermute_b32 v5, v133, v4 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_read_b128 v[0:3], v57 offset:1152 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_add_f32_e32 v2, v4, v5 |
| ; GCN-NEXT: ds_bpermute_b32 v3, v133, v2 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[0:1], v[8:9], v[96:111] |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[6:7] |
| ; GCN-NEXT: ; implicit-def: $vgpr4 |
| ; GCN-NEXT: v_fmac_f32_e32 v0, v4, v48 |
| ; GCN-NEXT: ds_read_b128 v[0:3], v57 offset:1728 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: s_endpgm |
| |
| attributes #0 = {"amdgpu-flat-work-group-size"="256,256"} |
| !0 = !{i64 2862105} |
| |
| ... |
| |
| --- |
| name: largeInterleave |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| stackPtrOffsetReg: '$sgpr32' |
| occupancy: 7 |
| body: | |
| bb.0: |
| liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2, $sgpr3, $sgpr4 |
| %11:vgpr_32 = IMPLICIT_DEF |
| %1:sgpr_512 = IMPLICIT_DEF |
| %16:vgpr_32 = IMPLICIT_DEF |
| %443:sgpr_128 = IMPLICIT_DEF |
| %18:sreg_32 = IMPLICIT_DEF |
| %25:vgpr_32 = IMPLICIT_DEF |
| %23:vgpr_32 = IMPLICIT_DEF |
| %391:vreg_128_align2 = IMPLICIT_DEF |
| %24:vgpr_32 = IMPLICIT_DEF |
| %392:vreg_128_align2 = IMPLICIT_DEF |
| %401:vreg_128_align2 = IMPLICIT_DEF |
| %406:vreg_128_align2 = IMPLICIT_DEF |
| %48:vgpr_32 = IMPLICIT_DEF |
| %473:sgpr_128 = IMPLICIT_DEF |
| %411:vreg_128_align2 = IMPLICIT_DEF |
| %416:vreg_128_align2 = IMPLICIT_DEF |
| %421:vreg_128_align2 = IMPLICIT_DEF |
| %426:vreg_128_align2 = IMPLICIT_DEF |
| %1114:sgpr_32 = IMPLICIT_DEF |
| %39:vgpr_32 = IMPLICIT_DEF |
| %484:sreg_64_xexec = IMPLICIT_DEF |
| %3346:vgpr_32 = IMPLICIT_DEF |
| %1422:sreg_32 = IMPLICIT_DEF |
| %1424:sreg_32 = IMPLICIT_DEF |
| %15:vgpr_32 = IMPLICIT_DEF |
| %494:sreg_32 = IMPLICIT_DEF |
| %47:vgpr_32 = IMPLICIT_DEF |
| %41:vgpr_32 = IMPLICIT_DEF |
| %42:vgpr_32 = IMPLICIT_DEF |
| %43:vgpr_32 = IMPLICIT_DEF |
| %44:vgpr_32 = IMPLICIT_DEF |
| %45:vgpr_32 = IMPLICIT_DEF |
| %50:sreg_32 = IMPLICIT_DEF |
| %3347:vgpr_32 = IMPLICIT_DEF |
| %3329:vgpr_32 = IMPLICIT_DEF |
| %3330:vgpr_32 = IMPLICIT_DEF |
| %3331:vgpr_32 = IMPLICIT_DEF |
| %3332:vgpr_32 = IMPLICIT_DEF |
| %3333:vgpr_32 = IMPLICIT_DEF |
| %2986:vreg_512_align2 = IMPLICIT_DEF |
| %3038:vreg_512_align2 = IMPLICIT_DEF |
| %2980:vreg_512_align2 = IMPLICIT_DEF |
| %3003:vreg_512_align2 = IMPLICIT_DEF |
| %3334:vgpr_32 = IMPLICIT_DEF |
| %3335:vgpr_32 = IMPLICIT_DEF |
| %3336:vgpr_32 = IMPLICIT_DEF |
| %3337:vgpr_32 = IMPLICIT_DEF |
| %3338:vgpr_32 = IMPLICIT_DEF |
| %3339:vgpr_32 = IMPLICIT_DEF |
| %3345:vgpr_32 = IMPLICIT_DEF |
| %3340:vgpr_32 = IMPLICIT_DEF |
| %3341:vgpr_32 = IMPLICIT_DEF |
| %3342:vgpr_32 = IMPLICIT_DEF |
| %3343:vgpr_32 = IMPLICIT_DEF |
| %3344:vgpr_32 = IMPLICIT_DEF |
| %84:vgpr_32 = COPY %3347 |
| %86:vgpr_32 = COPY %3347:vgpr_32 |
| IGLP_OPT 2 |
| %593:sreg_32 = V_READFIRSTLANE_B32 %11:vgpr_32, implicit $exec |
| %595:vgpr_32 = V_LSHL_ADD_U32_e64 %593:sreg_32, 4, %3329:vgpr_32, implicit $exec |
| %597:vgpr_32 = nsw V_MUL_LO_U32_e64 %595:vgpr_32, %1.sub6:sgpr_512, implicit $exec |
| %599:vgpr_32 = V_ADD_LSHL_U32_e64 %597:vgpr_32, %16:vgpr_32, 1, implicit $exec |
| %601:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %599:vgpr_32, %443:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %602:vgpr_32 = V_ADD_U32_e32 %18:sreg_32, %599:vgpr_32, implicit $exec |
| %603:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %602:vgpr_32, %443:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %605:sreg_32 = S_LSHL_B32 %593:sreg_32, 7, implicit-def dead $scc |
| %606:vgpr_32 = V_ADD_LSHL_U32_e64 %25:vgpr_32, %605:sreg_32, 1, implicit $exec |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %601:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %603:vreg_128_align2, 1024, 0, implicit $exec |
| %608:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %599:vgpr_32, %443:sgpr_128, 0, 64, 0, 0, implicit $exec |
| %610:vgpr_32 = V_ADD_U32_e32 64, %602:vgpr_32, implicit $exec |
| %611:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %610:vgpr_32, %443:sgpr_128, 0, 0, 0, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %612:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 0, 0, implicit $exec |
| early-clobber %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_vgprcd_e64 %612.sub0_sub1:vreg_128_align2, %391.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %612.sub2_sub3:vreg_128_align2, %391.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %626:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 512, 0, implicit $exec |
| early-clobber %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_vgprcd_e64 %626.sub0_sub1:vreg_128_align2, %391.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %626.sub2_sub3:vreg_128_align2, %391.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %638:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1024, 0, implicit $exec |
| early-clobber %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_vgprcd_e64 %638.sub0_sub1:vreg_128_align2, %391.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %638.sub2_sub3:vreg_128_align2, %391.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %650:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1536, 0, implicit $exec |
| early-clobber %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_vgprcd_e64 %650.sub0_sub1:vreg_128_align2, %391.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %650.sub2_sub3:vreg_128_align2, %391.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %662:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 0, 0, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %662.sub0_sub1:vreg_128_align2, %392.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %662.sub2_sub3:vreg_128_align2, %392.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %673:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 512, 0, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %673.sub0_sub1:vreg_128_align2, %392.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %673.sub2_sub3:vreg_128_align2, %392.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %684:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1024, 0, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %684.sub0_sub1:vreg_128_align2, %392.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %684.sub2_sub3:vreg_128_align2, %392.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %695:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1536, 0, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %695.sub0_sub1:vreg_128_align2, %392.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %695.sub2_sub3:vreg_128_align2, %392.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %608:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %611:vreg_128_align2, 1024, 0, implicit $exec |
| %706:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %599:vgpr_32, %443:sgpr_128, 0, 128, 0, 0, implicit $exec |
| %708:vgpr_32 = V_ADD_U32_e32 128, %602:vgpr_32, implicit $exec |
| %709:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %708:vgpr_32, %443:sgpr_128, 0, 0, 0, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %710:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 0, 0, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %710.sub0_sub1:vreg_128_align2, %401.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %710.sub2_sub3:vreg_128_align2, %401.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %721:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 512, 0, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %721.sub0_sub1:vreg_128_align2, %401.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %721.sub2_sub3:vreg_128_align2, %401.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %732:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1024, 0, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %732.sub0_sub1:vreg_128_align2, %401.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %732.sub2_sub3:vreg_128_align2, %401.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %743:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1536, 0, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %743.sub0_sub1:vreg_128_align2, %401.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %743.sub2_sub3:vreg_128_align2, %401.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %754:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 0, 0, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %754.sub0_sub1:vreg_128_align2, %406.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %754.sub2_sub3:vreg_128_align2, %406.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %765:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 512, 0, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %765.sub0_sub1:vreg_128_align2, %406.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %765.sub2_sub3:vreg_128_align2, %406.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %776:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1024, 0, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %776.sub0_sub1:vreg_128_align2, %406.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %776.sub2_sub3:vreg_128_align2, %406.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %787:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1536, 0, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %787.sub0_sub1:vreg_128_align2, %406.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %787.sub2_sub3:vreg_128_align2, %406.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %706:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %709:vreg_128_align2, 1024, 0, implicit $exec |
| %798:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %599:vgpr_32, %443:sgpr_128, 0, 192, 0, 0, implicit $exec |
| %800:vgpr_32 = V_ADD_U32_e32 192, %602:vgpr_32, implicit $exec |
| %801:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %800:vgpr_32, %443:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %802:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3330:vgpr_32, implicit $exec |
| %803:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %802:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %804:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3331:vgpr_32, implicit $exec |
| %805:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %804:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %806:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3332:vgpr_32, implicit $exec |
| %807:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %806:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %808:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3333:vgpr_32, implicit $exec |
| %809:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %808:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %810:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 0, 0, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %810.sub0_sub1:vreg_128_align2, %411.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %810.sub2_sub3:vreg_128_align2, %411.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %821:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 512, 0, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %821.sub0_sub1:vreg_128_align2, %411.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %821.sub2_sub3:vreg_128_align2, %411.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %832:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1024, 0, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %832.sub0_sub1:vreg_128_align2, %411.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %832.sub2_sub3:vreg_128_align2, %411.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %843:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1536, 0, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %843.sub0_sub1:vreg_128_align2, %411.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %843.sub2_sub3:vreg_128_align2, %411.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %854:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 0, 0, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %854.sub0_sub1:vreg_128_align2, %416.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %854.sub2_sub3:vreg_128_align2, %416.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %865:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 512, 0, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %865.sub0_sub1:vreg_128_align2, %416.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %865.sub2_sub3:vreg_128_align2, %416.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %876:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1024, 0, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %876.sub0_sub1:vreg_128_align2, %416.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %876.sub2_sub3:vreg_128_align2, %416.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %887:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1536, 0, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %887.sub0_sub1:vreg_128_align2, %416.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %887.sub2_sub3:vreg_128_align2, %416.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %798:vreg_128_align2, 0, 0, implicit $exec |
| DS_WRITE_B128_gfx9 %606:vgpr_32, %801:vreg_128_align2, 1024, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %898:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 0, 0, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %898.sub0_sub1:vreg_128_align2, %421.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %898.sub2_sub3:vreg_128_align2, %421.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %909:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 512, 0, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %909.sub0_sub1:vreg_128_align2, %421.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %909.sub2_sub3:vreg_128_align2, %421.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %920:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1024, 0, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %920.sub0_sub1:vreg_128_align2, %421.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %920.sub2_sub3:vreg_128_align2, %421.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %931:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1536, 0, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %931.sub0_sub1:vreg_128_align2, %421.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %931.sub2_sub3:vreg_128_align2, %421.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %942:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 0, 0, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %942.sub0_sub1:vreg_128_align2, %426.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %942.sub2_sub3:vreg_128_align2, %426.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %969:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 512, 0, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %969.sub0_sub1:vreg_128_align2, %426.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %969.sub2_sub3:vreg_128_align2, %426.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %996:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1024, 0, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %996.sub0_sub1:vreg_128_align2, %426.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %996.sub2_sub3:vreg_128_align2, %426.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %1023:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1536, 0, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1023.sub0_sub1:vreg_128_align2, %426.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1023.sub2_sub3:vreg_128_align2, %426.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %1050:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub0:vreg_512_align2, implicit $mode, implicit $exec |
| %1051:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub1:vreg_512_align2, implicit $mode, implicit $exec |
| %1052:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub2:vreg_512_align2, implicit $mode, implicit $exec |
| %1053:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub3:vreg_512_align2, implicit $mode, implicit $exec |
| %1054:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub4:vreg_512_align2, implicit $mode, implicit $exec |
| %1055:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub5:vreg_512_align2, implicit $mode, implicit $exec |
| %1056:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub6:vreg_512_align2, implicit $mode, implicit $exec |
| %1057:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub7:vreg_512_align2, implicit $mode, implicit $exec |
| %1058:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub8:vreg_512_align2, implicit $mode, implicit $exec |
| %1059:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub9:vreg_512_align2, implicit $mode, implicit $exec |
| %1060:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub10:vreg_512_align2, implicit $mode, implicit $exec |
| %1061:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub11:vreg_512_align2, implicit $mode, implicit $exec |
| %1062:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub12:vreg_512_align2, implicit $mode, implicit $exec |
| %1063:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub13:vreg_512_align2, implicit $mode, implicit $exec |
| %1064:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub14:vreg_512_align2, implicit $mode, implicit $exec |
| %1065:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub15:vreg_512_align2, implicit $mode, implicit $exec |
| %1066:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub0:vreg_512_align2, implicit $mode, implicit $exec |
| %1067:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub1:vreg_512_align2, implicit $mode, implicit $exec |
| %1068:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub2:vreg_512_align2, implicit $mode, implicit $exec |
| %1069:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub3:vreg_512_align2, implicit $mode, implicit $exec |
| %1070:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub4:vreg_512_align2, implicit $mode, implicit $exec |
| %1071:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub5:vreg_512_align2, implicit $mode, implicit $exec |
| %1072:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub6:vreg_512_align2, implicit $mode, implicit $exec |
| %1073:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub7:vreg_512_align2, implicit $mode, implicit $exec |
| %1074:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub8:vreg_512_align2, implicit $mode, implicit $exec |
| %1075:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub9:vreg_512_align2, implicit $mode, implicit $exec |
| %1076:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub10:vreg_512_align2, implicit $mode, implicit $exec |
| %1077:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub11:vreg_512_align2, implicit $mode, implicit $exec |
| %1078:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub12:vreg_512_align2, implicit $mode, implicit $exec |
| %1079:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub13:vreg_512_align2, implicit $mode, implicit $exec |
| %1080:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub14:vreg_512_align2, implicit $mode, implicit $exec |
| %1081:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub15:vreg_512_align2, implicit $mode, implicit $exec |
| %1082:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub0:vreg_512_align2, implicit $mode, implicit $exec |
| %1083:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub1:vreg_512_align2, implicit $mode, implicit $exec |
| %1084:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub2:vreg_512_align2, implicit $mode, implicit $exec |
| %1085:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub3:vreg_512_align2, implicit $mode, implicit $exec |
| %1086:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub4:vreg_512_align2, implicit $mode, implicit $exec |
| %1087:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub5:vreg_512_align2, implicit $mode, implicit $exec |
| %1088:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub6:vreg_512_align2, implicit $mode, implicit $exec |
| %1089:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub7:vreg_512_align2, implicit $mode, implicit $exec |
| %1090:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub8:vreg_512_align2, implicit $mode, implicit $exec |
| %1091:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub9:vreg_512_align2, implicit $mode, implicit $exec |
| %1092:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub10:vreg_512_align2, implicit $mode, implicit $exec |
| %1093:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub11:vreg_512_align2, implicit $mode, implicit $exec |
| %1094:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub12:vreg_512_align2, implicit $mode, implicit $exec |
| %1095:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub13:vreg_512_align2, implicit $mode, implicit $exec |
| %1096:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub14:vreg_512_align2, implicit $mode, implicit $exec |
| %1097:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub15:vreg_512_align2, implicit $mode, implicit $exec |
| %1098:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub0:vreg_512_align2, implicit $mode, implicit $exec |
| %1099:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub1:vreg_512_align2, implicit $mode, implicit $exec |
| %1100:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub2:vreg_512_align2, implicit $mode, implicit $exec |
| %1101:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub3:vreg_512_align2, implicit $mode, implicit $exec |
| %1102:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub4:vreg_512_align2, implicit $mode, implicit $exec |
| %1103:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub5:vreg_512_align2, implicit $mode, implicit $exec |
| %1104:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub6:vreg_512_align2, implicit $mode, implicit $exec |
| %1105:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub7:vreg_512_align2, implicit $mode, implicit $exec |
| %1106:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub8:vreg_512_align2, implicit $mode, implicit $exec |
| %1107:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub9:vreg_512_align2, implicit $mode, implicit $exec |
| %1108:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub10:vreg_512_align2, implicit $mode, implicit $exec |
| %1109:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub11:vreg_512_align2, implicit $mode, implicit $exec |
| %1110:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub12:vreg_512_align2, implicit $mode, implicit $exec |
| %1111:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub13:vreg_512_align2, implicit $mode, implicit $exec |
| %1112:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub14:vreg_512_align2, implicit $mode, implicit $exec |
| %1113:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub15:vreg_512_align2, implicit $mode, implicit $exec |
| %1115:vgpr_32 = V_MAX3_F32_e64 0, %1050:vgpr_32, 0, %1114:sgpr_32, 0, %1051:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1116:vgpr_32 = V_MAX3_F32_e64 0, %1115:vgpr_32, 0, %1052:vgpr_32, 0, %1053:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1117:vgpr_32 = V_MAX3_F32_e64 0, %1116:vgpr_32, 0, %1054:vgpr_32, 0, %1055:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1118:vgpr_32 = V_MAX3_F32_e64 0, %1117:vgpr_32, 0, %1056:vgpr_32, 0, %1057:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1119:vgpr_32 = V_MAX3_F32_e64 0, %1118:vgpr_32, 0, %1058:vgpr_32, 0, %1059:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1120:vgpr_32 = V_MAX3_F32_e64 0, %1119:vgpr_32, 0, %1060:vgpr_32, 0, %1061:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1121:vgpr_32 = V_MAX3_F32_e64 0, %1120:vgpr_32, 0, %1062:vgpr_32, 0, %1063:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1122:vgpr_32 = V_MAX3_F32_e64 0, %1121:vgpr_32, 0, %1064:vgpr_32, 0, %1065:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1123:vgpr_32 = V_MAX3_F32_e64 0, %1122:vgpr_32, 0, %1066:vgpr_32, 0, %1067:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1124:vgpr_32 = V_MAX3_F32_e64 0, %1123:vgpr_32, 0, %1068:vgpr_32, 0, %1069:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1125:vgpr_32 = V_MAX3_F32_e64 0, %1124:vgpr_32, 0, %1070:vgpr_32, 0, %1071:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1126:vgpr_32 = V_MAX3_F32_e64 0, %1125:vgpr_32, 0, %1072:vgpr_32, 0, %1073:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1127:vgpr_32 = V_MAX3_F32_e64 0, %1126:vgpr_32, 0, %1074:vgpr_32, 0, %1075:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1128:vgpr_32 = V_MAX3_F32_e64 0, %1127:vgpr_32, 0, %1076:vgpr_32, 0, %1077:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1129:vgpr_32 = V_MAX3_F32_e64 0, %1128:vgpr_32, 0, %1078:vgpr_32, 0, %1079:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1130:vgpr_32 = V_MAX3_F32_e64 0, %1129:vgpr_32, 0, %1080:vgpr_32, 0, %1081:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1131:vgpr_32 = V_MAX3_F32_e64 0, %1130:vgpr_32, 0, %1082:vgpr_32, 0, %1083:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1132:vgpr_32 = V_MAX3_F32_e64 0, %1131:vgpr_32, 0, %1084:vgpr_32, 0, %1085:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1133:vgpr_32 = V_MAX3_F32_e64 0, %1132:vgpr_32, 0, %1086:vgpr_32, 0, %1087:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1134:vgpr_32 = V_MAX3_F32_e64 0, %1133:vgpr_32, 0, %1088:vgpr_32, 0, %1089:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1135:vgpr_32 = V_MAX3_F32_e64 0, %1134:vgpr_32, 0, %1090:vgpr_32, 0, %1091:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1136:vgpr_32 = V_MAX3_F32_e64 0, %1135:vgpr_32, 0, %1092:vgpr_32, 0, %1093:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1137:vgpr_32 = V_MAX3_F32_e64 0, %1136:vgpr_32, 0, %1094:vgpr_32, 0, %1095:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1138:vgpr_32 = V_MAX3_F32_e64 0, %1137:vgpr_32, 0, %1096:vgpr_32, 0, %1097:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1139:vgpr_32 = V_MAX3_F32_e64 0, %1138:vgpr_32, 0, %1098:vgpr_32, 0, %1099:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1140:vgpr_32 = V_MAX3_F32_e64 0, %1139:vgpr_32, 0, %1100:vgpr_32, 0, %1101:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1141:vgpr_32 = V_MAX3_F32_e64 0, %1140:vgpr_32, 0, %1102:vgpr_32, 0, %1103:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1142:vgpr_32 = V_MAX3_F32_e64 0, %1141:vgpr_32, 0, %1104:vgpr_32, 0, %1105:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1143:vgpr_32 = V_MAX3_F32_e64 0, %1142:vgpr_32, 0, %1106:vgpr_32, 0, %1107:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1144:vgpr_32 = V_MAX3_F32_e64 0, %1143:vgpr_32, 0, %1108:vgpr_32, 0, %1109:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1145:vgpr_32 = V_MAX3_F32_e64 0, %1144:vgpr_32, 0, %1110:vgpr_32, 0, %1111:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1146:vgpr_32 = V_MAX3_F32_e64 0, %1145:vgpr_32, 0, %1112:vgpr_32, 0, %1113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1147:vgpr_32 = DS_BPERMUTE_B32 %39:vgpr_32, %1146:vgpr_32, 0, implicit $exec |
| %1148:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %1147:vgpr_32, %1147:vgpr_32, implicit $mode, implicit $exec |
| %1149:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %1146:vgpr_32, %1148:vgpr_32, implicit $mode, implicit $exec |
| %1150:vgpr_32 = DS_BPERMUTE_B32 %39:vgpr_32, %1149:vgpr_32, 0, implicit $exec |
| %1151:vgpr_32 = V_CNDMASK_B32_e64 0, %1150:vgpr_32, 0, %1149:vgpr_32, %484:sreg_64_xexec, implicit $exec |
| %1153:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %1151:vgpr_32, %1151:vgpr_32, implicit $mode, implicit $exec |
| %1154:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %3346:vgpr_32, %3346:vgpr_32, implicit $mode, implicit $exec |
| %151:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %1154:vgpr_32, %1153:vgpr_32, implicit $mode, implicit $exec |
| %1155:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub0:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1157:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1155:vgpr_32, implicit $mode, implicit $exec |
| %1158:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1157:vgpr_32, implicit $mode, implicit $exec |
| %1159:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub1:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1160:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1159:vgpr_32, implicit $mode, implicit $exec |
| %1161:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1160:vgpr_32, implicit $mode, implicit $exec |
| %1162:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub2:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1163:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1162:vgpr_32, implicit $mode, implicit $exec |
| %1164:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1163:vgpr_32, implicit $mode, implicit $exec |
| %1165:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub3:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1166:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1165:vgpr_32, implicit $mode, implicit $exec |
| %1167:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1166:vgpr_32, implicit $mode, implicit $exec |
| %1168:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub4:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1169:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1168:vgpr_32, implicit $mode, implicit $exec |
| %1170:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1169:vgpr_32, implicit $mode, implicit $exec |
| %1171:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub5:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1172:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1171:vgpr_32, implicit $mode, implicit $exec |
| %1173:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1172:vgpr_32, implicit $mode, implicit $exec |
| %1174:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub6:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1175:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1174:vgpr_32, implicit $mode, implicit $exec |
| %1176:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1175:vgpr_32, implicit $mode, implicit $exec |
| %1177:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub7:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1178:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1177:vgpr_32, implicit $mode, implicit $exec |
| %1179:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1178:vgpr_32, implicit $mode, implicit $exec |
| %1180:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub8:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1181:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1180:vgpr_32, implicit $mode, implicit $exec |
| %1182:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1181:vgpr_32, implicit $mode, implicit $exec |
| %1183:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub9:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1184:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1183:vgpr_32, implicit $mode, implicit $exec |
| %1185:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1184:vgpr_32, implicit $mode, implicit $exec |
| %1186:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub10:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1187:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1186:vgpr_32, implicit $mode, implicit $exec |
| %1188:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1187:vgpr_32, implicit $mode, implicit $exec |
| %1189:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub11:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1190:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1189:vgpr_32, implicit $mode, implicit $exec |
| %1191:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1190:vgpr_32, implicit $mode, implicit $exec |
| %1192:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub12:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1193:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1192:vgpr_32, implicit $mode, implicit $exec |
| %1194:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1193:vgpr_32, implicit $mode, implicit $exec |
| %1195:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub13:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1196:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1195:vgpr_32, implicit $mode, implicit $exec |
| %1197:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1196:vgpr_32, implicit $mode, implicit $exec |
| %1198:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub14:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1199:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1198:vgpr_32, implicit $mode, implicit $exec |
| %1200:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1199:vgpr_32, implicit $mode, implicit $exec |
| %1201:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub15:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1202:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1201:vgpr_32, implicit $mode, implicit $exec |
| %1203:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1202:vgpr_32, implicit $mode, implicit $exec |
| %1204:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub0:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1205:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1204:vgpr_32, implicit $mode, implicit $exec |
| %1206:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1205:vgpr_32, implicit $mode, implicit $exec |
| %1207:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub1:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1208:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1207:vgpr_32, implicit $mode, implicit $exec |
| %1209:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1208:vgpr_32, implicit $mode, implicit $exec |
| %1210:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub2:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1211:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1210:vgpr_32, implicit $mode, implicit $exec |
| %1212:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1211:vgpr_32, implicit $mode, implicit $exec |
| %1213:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub3:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1214:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1213:vgpr_32, implicit $mode, implicit $exec |
| %1215:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1214:vgpr_32, implicit $mode, implicit $exec |
| %1216:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub4:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1217:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1216:vgpr_32, implicit $mode, implicit $exec |
| %1218:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1217:vgpr_32, implicit $mode, implicit $exec |
| %1219:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub5:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1220:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1219:vgpr_32, implicit $mode, implicit $exec |
| %1221:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1220:vgpr_32, implicit $mode, implicit $exec |
| %1222:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub6:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1223:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1222:vgpr_32, implicit $mode, implicit $exec |
| %1224:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1223:vgpr_32, implicit $mode, implicit $exec |
| %1225:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub7:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1226:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1225:vgpr_32, implicit $mode, implicit $exec |
| %1227:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1226:vgpr_32, implicit $mode, implicit $exec |
| %1228:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub8:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1229:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1228:vgpr_32, implicit $mode, implicit $exec |
| %1230:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1229:vgpr_32, implicit $mode, implicit $exec |
| %1231:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub9:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1232:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1231:vgpr_32, implicit $mode, implicit $exec |
| %1233:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1232:vgpr_32, implicit $mode, implicit $exec |
| %1234:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub10:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1235:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1234:vgpr_32, implicit $mode, implicit $exec |
| %1236:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1235:vgpr_32, implicit $mode, implicit $exec |
| %1237:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub11:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1238:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1237:vgpr_32, implicit $mode, implicit $exec |
| %1239:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1238:vgpr_32, implicit $mode, implicit $exec |
| %1240:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub12:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1241:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1240:vgpr_32, implicit $mode, implicit $exec |
| %1242:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1241:vgpr_32, implicit $mode, implicit $exec |
| %1243:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub13:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1244:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1243:vgpr_32, implicit $mode, implicit $exec |
| %1245:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1244:vgpr_32, implicit $mode, implicit $exec |
| %1246:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub14:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1247:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1246:vgpr_32, implicit $mode, implicit $exec |
| %1248:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1247:vgpr_32, implicit $mode, implicit $exec |
| %1249:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub15:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1250:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1249:vgpr_32, implicit $mode, implicit $exec |
| %1251:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1250:vgpr_32, implicit $mode, implicit $exec |
| %1252:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub0:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1253:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1252:vgpr_32, implicit $mode, implicit $exec |
| %1254:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1253:vgpr_32, implicit $mode, implicit $exec |
| %1255:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub1:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1256:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1255:vgpr_32, implicit $mode, implicit $exec |
| %1257:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1256:vgpr_32, implicit $mode, implicit $exec |
| %1258:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub2:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1259:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1258:vgpr_32, implicit $mode, implicit $exec |
| %1260:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1259:vgpr_32, implicit $mode, implicit $exec |
| %1261:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub3:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1262:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1261:vgpr_32, implicit $mode, implicit $exec |
| %1263:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1262:vgpr_32, implicit $mode, implicit $exec |
| %1264:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub4:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1265:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1264:vgpr_32, implicit $mode, implicit $exec |
| %1266:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1265:vgpr_32, implicit $mode, implicit $exec |
| %1267:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub5:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1268:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1267:vgpr_32, implicit $mode, implicit $exec |
| %1269:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1268:vgpr_32, implicit $mode, implicit $exec |
| %1270:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub6:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1271:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1270:vgpr_32, implicit $mode, implicit $exec |
| %1272:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1271:vgpr_32, implicit $mode, implicit $exec |
| %1273:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub7:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1274:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1273:vgpr_32, implicit $mode, implicit $exec |
| %1275:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1274:vgpr_32, implicit $mode, implicit $exec |
| %1276:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub8:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1277:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1276:vgpr_32, implicit $mode, implicit $exec |
| %1278:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1277:vgpr_32, implicit $mode, implicit $exec |
| %1279:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub9:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1280:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1279:vgpr_32, implicit $mode, implicit $exec |
| %1281:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1280:vgpr_32, implicit $mode, implicit $exec |
| %1282:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub10:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1283:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1282:vgpr_32, implicit $mode, implicit $exec |
| %1284:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1283:vgpr_32, implicit $mode, implicit $exec |
| %1285:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub11:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1286:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1285:vgpr_32, implicit $mode, implicit $exec |
| %1287:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1286:vgpr_32, implicit $mode, implicit $exec |
| %1288:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub12:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1289:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1288:vgpr_32, implicit $mode, implicit $exec |
| %1290:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1289:vgpr_32, implicit $mode, implicit $exec |
| %1291:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub13:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1292:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1291:vgpr_32, implicit $mode, implicit $exec |
| %1293:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1292:vgpr_32, implicit $mode, implicit $exec |
| %1294:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub14:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1295:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1294:vgpr_32, implicit $mode, implicit $exec |
| %1296:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1295:vgpr_32, implicit $mode, implicit $exec |
| %1297:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub15:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1298:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1297:vgpr_32, implicit $mode, implicit $exec |
| %1299:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1298:vgpr_32, implicit $mode, implicit $exec |
| %1300:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub0:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1301:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1300:vgpr_32, implicit $mode, implicit $exec |
| %1302:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1301:vgpr_32, implicit $mode, implicit $exec |
| %1303:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub1:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1304:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1303:vgpr_32, implicit $mode, implicit $exec |
| %1305:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1304:vgpr_32, implicit $mode, implicit $exec |
| %1306:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub2:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1307:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1306:vgpr_32, implicit $mode, implicit $exec |
| %1308:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1307:vgpr_32, implicit $mode, implicit $exec |
| %1309:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub3:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1310:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1309:vgpr_32, implicit $mode, implicit $exec |
| %1311:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1310:vgpr_32, implicit $mode, implicit $exec |
| %1312:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub4:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1313:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1312:vgpr_32, implicit $mode, implicit $exec |
| %1314:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1313:vgpr_32, implicit $mode, implicit $exec |
| %1315:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub5:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1316:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1315:vgpr_32, implicit $mode, implicit $exec |
| %1317:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1316:vgpr_32, implicit $mode, implicit $exec |
| %1318:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub6:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1319:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1318:vgpr_32, implicit $mode, implicit $exec |
| %1320:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1319:vgpr_32, implicit $mode, implicit $exec |
| %1321:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub7:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1322:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1321:vgpr_32, implicit $mode, implicit $exec |
| %1323:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1322:vgpr_32, implicit $mode, implicit $exec |
| %1324:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub8:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1325:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1324:vgpr_32, implicit $mode, implicit $exec |
| %1326:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1325:vgpr_32, implicit $mode, implicit $exec |
| %1327:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub9:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1328:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1327:vgpr_32, implicit $mode, implicit $exec |
| %1329:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1328:vgpr_32, implicit $mode, implicit $exec |
| %1330:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub10:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1331:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1330:vgpr_32, implicit $mode, implicit $exec |
| %1332:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1331:vgpr_32, implicit $mode, implicit $exec |
| %1333:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub11:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1334:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1333:vgpr_32, implicit $mode, implicit $exec |
| %1335:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1334:vgpr_32, implicit $mode, implicit $exec |
| %1336:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub12:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1337:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1336:vgpr_32, implicit $mode, implicit $exec |
| %1338:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1337:vgpr_32, implicit $mode, implicit $exec |
| %1339:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub13:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1340:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1339:vgpr_32, implicit $mode, implicit $exec |
| %1341:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1340:vgpr_32, implicit $mode, implicit $exec |
| %1342:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub14:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1343:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1342:vgpr_32, implicit $mode, implicit $exec |
| %1344:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1343:vgpr_32, implicit $mode, implicit $exec |
| %1345:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub15:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %1346:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1345:vgpr_32, implicit $mode, implicit $exec |
| %1347:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1346:vgpr_32, implicit $mode, implicit $exec |
| %1348:vgpr_32 = contract nofpexcept V_ADD_F32_e32 0, %1158:vgpr_32, implicit $mode, implicit $exec |
| %1349:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1161:vgpr_32, %1348:vgpr_32, implicit $mode, implicit $exec |
| %1350:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1164:vgpr_32, %1349:vgpr_32, implicit $mode, implicit $exec |
| %1351:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1167:vgpr_32, %1350:vgpr_32, implicit $mode, implicit $exec |
| %1352:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1170:vgpr_32, %1351:vgpr_32, implicit $mode, implicit $exec |
| %1353:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1173:vgpr_32, %1352:vgpr_32, implicit $mode, implicit $exec |
| %1354:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1176:vgpr_32, %1353:vgpr_32, implicit $mode, implicit $exec |
| %1355:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1179:vgpr_32, %1354:vgpr_32, implicit $mode, implicit $exec |
| %1356:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1182:vgpr_32, %1355:vgpr_32, implicit $mode, implicit $exec |
| %1357:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1185:vgpr_32, %1356:vgpr_32, implicit $mode, implicit $exec |
| %1358:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1188:vgpr_32, %1357:vgpr_32, implicit $mode, implicit $exec |
| %1359:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1191:vgpr_32, %1358:vgpr_32, implicit $mode, implicit $exec |
| %1360:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1194:vgpr_32, %1359:vgpr_32, implicit $mode, implicit $exec |
| %1361:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1197:vgpr_32, %1360:vgpr_32, implicit $mode, implicit $exec |
| %1362:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1200:vgpr_32, %1361:vgpr_32, implicit $mode, implicit $exec |
| %1363:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1203:vgpr_32, %1362:vgpr_32, implicit $mode, implicit $exec |
| %1364:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1206:vgpr_32, %1363:vgpr_32, implicit $mode, implicit $exec |
| %1365:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1209:vgpr_32, %1364:vgpr_32, implicit $mode, implicit $exec |
| %1366:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1212:vgpr_32, %1365:vgpr_32, implicit $mode, implicit $exec |
| %1367:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1215:vgpr_32, %1366:vgpr_32, implicit $mode, implicit $exec |
| %1368:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1218:vgpr_32, %1367:vgpr_32, implicit $mode, implicit $exec |
| %1369:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1221:vgpr_32, %1368:vgpr_32, implicit $mode, implicit $exec |
| %1370:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1224:vgpr_32, %1369:vgpr_32, implicit $mode, implicit $exec |
| %1371:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1227:vgpr_32, %1370:vgpr_32, implicit $mode, implicit $exec |
| %1372:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1230:vgpr_32, %1371:vgpr_32, implicit $mode, implicit $exec |
| %1373:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1233:vgpr_32, %1372:vgpr_32, implicit $mode, implicit $exec |
| %1374:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1236:vgpr_32, %1373:vgpr_32, implicit $mode, implicit $exec |
| %1375:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1239:vgpr_32, %1374:vgpr_32, implicit $mode, implicit $exec |
| %1376:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1242:vgpr_32, %1375:vgpr_32, implicit $mode, implicit $exec |
| %1377:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1245:vgpr_32, %1376:vgpr_32, implicit $mode, implicit $exec |
| %1378:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1248:vgpr_32, %1377:vgpr_32, implicit $mode, implicit $exec |
| %1379:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1251:vgpr_32, %1378:vgpr_32, implicit $mode, implicit $exec |
| %1380:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1254:vgpr_32, %1379:vgpr_32, implicit $mode, implicit $exec |
| %1381:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1257:vgpr_32, %1380:vgpr_32, implicit $mode, implicit $exec |
| %1382:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1260:vgpr_32, %1381:vgpr_32, implicit $mode, implicit $exec |
| %1383:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1263:vgpr_32, %1382:vgpr_32, implicit $mode, implicit $exec |
| %1384:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1266:vgpr_32, %1383:vgpr_32, implicit $mode, implicit $exec |
| %1385:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1269:vgpr_32, %1384:vgpr_32, implicit $mode, implicit $exec |
| %1386:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1272:vgpr_32, %1385:vgpr_32, implicit $mode, implicit $exec |
| %1387:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1275:vgpr_32, %1386:vgpr_32, implicit $mode, implicit $exec |
| %1388:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1278:vgpr_32, %1387:vgpr_32, implicit $mode, implicit $exec |
| %1389:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1281:vgpr_32, %1388:vgpr_32, implicit $mode, implicit $exec |
| %1390:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1284:vgpr_32, %1389:vgpr_32, implicit $mode, implicit $exec |
| %1391:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1287:vgpr_32, %1390:vgpr_32, implicit $mode, implicit $exec |
| %1392:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1290:vgpr_32, %1391:vgpr_32, implicit $mode, implicit $exec |
| %1393:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1293:vgpr_32, %1392:vgpr_32, implicit $mode, implicit $exec |
| %1394:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1296:vgpr_32, %1393:vgpr_32, implicit $mode, implicit $exec |
| %1395:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1299:vgpr_32, %1394:vgpr_32, implicit $mode, implicit $exec |
| %1396:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1302:vgpr_32, %1395:vgpr_32, implicit $mode, implicit $exec |
| %1397:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1305:vgpr_32, %1396:vgpr_32, implicit $mode, implicit $exec |
| %1398:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1308:vgpr_32, %1397:vgpr_32, implicit $mode, implicit $exec |
| %1399:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1311:vgpr_32, %1398:vgpr_32, implicit $mode, implicit $exec |
| %1400:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1314:vgpr_32, %1399:vgpr_32, implicit $mode, implicit $exec |
| %1401:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1317:vgpr_32, %1400:vgpr_32, implicit $mode, implicit $exec |
| %1402:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1320:vgpr_32, %1401:vgpr_32, implicit $mode, implicit $exec |
| %1403:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1323:vgpr_32, %1402:vgpr_32, implicit $mode, implicit $exec |
| %1404:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1326:vgpr_32, %1403:vgpr_32, implicit $mode, implicit $exec |
| %1405:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1329:vgpr_32, %1404:vgpr_32, implicit $mode, implicit $exec |
| %1406:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1332:vgpr_32, %1405:vgpr_32, implicit $mode, implicit $exec |
| %1407:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1335:vgpr_32, %1406:vgpr_32, implicit $mode, implicit $exec |
| %1408:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1338:vgpr_32, %1407:vgpr_32, implicit $mode, implicit $exec |
| %1409:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1341:vgpr_32, %1408:vgpr_32, implicit $mode, implicit $exec |
| %1410:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1344:vgpr_32, %1409:vgpr_32, implicit $mode, implicit $exec |
| %1411:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1347:vgpr_32, %1410:vgpr_32, implicit $mode, implicit $exec |
| %1412:vgpr_32 = DS_BPERMUTE_B32 %39:vgpr_32, %1411:vgpr_32, 0, implicit $exec |
| %1413:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1411:vgpr_32, %1412:vgpr_32, implicit $mode, implicit $exec |
| %1414:vgpr_32 = DS_BPERMUTE_B32 %39:vgpr_32, %1413:vgpr_32, 0, implicit $exec |
| %3347:vgpr_32 = V_CNDMASK_B32_e64 0, %1414:vgpr_32, 0, %1413:vgpr_32, %484:sreg_64_xexec, implicit $exec |
| %1417:vgpr_32 = contract nofpexcept V_SUB_F32_e32 %3346:vgpr_32, %151:vgpr_32, implicit $mode, implicit $exec |
| %1418:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1417:vgpr_32, implicit $mode, implicit $exec |
| undef %1455.sub0:vreg_64_align2 = afn nofpexcept V_EXP_F32_e32 %1418:vgpr_32, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| undef %3037.sub0:vreg_64_align2 = V_PERM_B32_e64 %805.sub0:vreg_64_align2, %803.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %3021.sub0:vreg_64_align2 = V_PERM_B32_e64 %805.sub0:vreg_64_align2, %803.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3037.sub1:vreg_64_align2 = V_PERM_B32_e64 %809.sub0:vreg_64_align2, %807.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %3021.sub1:vreg_64_align2 = V_PERM_B32_e64 %809.sub0:vreg_64_align2, %807.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| undef %3005.sub0:vreg_64_align2 = V_PERM_B32_e64 %805.sub1:vreg_64_align2, %803.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %2978.sub0:vreg_64_align2 = V_PERM_B32_e64 %805.sub1:vreg_64_align2, %803.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3005.sub1:vreg_64_align2 = V_PERM_B32_e64 %809.sub1:vreg_64_align2, %807.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %2978.sub1:vreg_64_align2 = V_PERM_B32_e64 %809.sub1:vreg_64_align2, %807.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %1442:vgpr_32 = V_ADD_U32_e32 %593:sreg_32, %15:vgpr_32, implicit $exec |
| %1444:vgpr_32 = V_AND_B32_e32 536870911, %1442:vgpr_32, implicit $exec |
| %1446:vgpr_32 = nsw V_MUL_LO_U32_e64 %1444:vgpr_32, %494:sreg_32, implicit $exec |
| %1447:vgpr_32 = V_ADD_LSHL_U32_e64 %47:vgpr_32, %1446:vgpr_32, 1, implicit $exec |
| DS_WRITE_B64_gfx9 %1447:vgpr_32, %3037:vreg_64_align2, 0, 0, implicit $exec |
| %1449:vgpr_32 = V_LSHL_ADD_U32_e64 %41:vgpr_32, 1, %1447:vgpr_32, implicit $exec |
| DS_WRITE_B64_gfx9 %1449:vgpr_32, %3021:vreg_64_align2, 0, 0, implicit $exec |
| %1451:vgpr_32 = V_LSHL_ADD_U32_e64 %42:vgpr_32, 1, %1449:vgpr_32, implicit $exec |
| DS_WRITE_B64_gfx9 %1451:vgpr_32, %3005:vreg_64_align2, 0, 0, implicit $exec |
| %1453:vgpr_32 = V_LSHL_ADD_U32_e64 %43:vgpr_32, 1, %1451:vgpr_32, implicit $exec |
| DS_WRITE_B64_gfx9 %1453:vgpr_32, %2978:vreg_64_align2, 0, 0, implicit $exec |
| %3347:vgpr_32 = contract nofpexcept V_FMAC_F32_e32 %86:vgpr_32, %1455.sub0:vreg_64_align2, %3347:vgpr_32, implicit $mode, implicit $exec |
| %2986.sub0_sub1:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub0_sub1:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2986.sub2_sub3:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub2_sub3:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2986.sub4_sub5:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub4_sub5:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2986.sub6_sub7:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub6_sub7:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2986.sub8_sub9:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub8_sub9:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2986.sub10_sub11:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub10_sub11:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2986.sub12_sub13:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub12_sub13:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2986.sub14_sub15:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub14_sub15:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub0_sub1:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub0_sub1:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub2_sub3:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub2_sub3:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub4_sub5:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub4_sub5:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub6_sub7:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub6_sub7:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub8_sub9:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub8_sub9:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub10_sub11:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub10_sub11:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub12_sub13:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub12_sub13:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3038.sub14_sub15:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub14_sub15:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub0_sub1:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub0_sub1:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub2_sub3:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub2_sub3:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub4_sub5:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub4_sub5:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub6_sub7:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub6_sub7:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub8_sub9:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub8_sub9:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub10_sub11:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub10_sub11:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub12_sub13:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub12_sub13:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %2980.sub14_sub15:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub14_sub15:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub0_sub1:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub0_sub1:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub2_sub3:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub2_sub3:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub4_sub5:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub4_sub5:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub6_sub7:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub6_sub7:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub8_sub9:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub8_sub9:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub10_sub11:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub10_sub11:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub12_sub13:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub12_sub13:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %3003.sub14_sub15:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub14_sub15:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %1554:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1158:vgpr_32, implicit $mode, implicit $exec |
| %1555:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1161:vgpr_32, implicit $mode, implicit $exec |
| %1556:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1164:vgpr_32, implicit $mode, implicit $exec |
| %1557:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1170:vgpr_32, implicit $mode, implicit $exec |
| %1558:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1173:vgpr_32, implicit $mode, implicit $exec |
| %1559:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1176:vgpr_32, implicit $mode, implicit $exec |
| %1560:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1182:vgpr_32, implicit $mode, implicit $exec |
| %1561:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1185:vgpr_32, implicit $mode, implicit $exec |
| %1562:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1188:vgpr_32, implicit $mode, implicit $exec |
| %1563:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1194:vgpr_32, implicit $mode, implicit $exec |
| %1564:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1197:vgpr_32, implicit $mode, implicit $exec |
| %1565:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1200:vgpr_32, implicit $mode, implicit $exec |
| %1566:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1206:vgpr_32, implicit $mode, implicit $exec |
| %1567:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1209:vgpr_32, implicit $mode, implicit $exec |
| %1568:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1212:vgpr_32, implicit $mode, implicit $exec |
| %1569:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1218:vgpr_32, implicit $mode, implicit $exec |
| %1570:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1221:vgpr_32, implicit $mode, implicit $exec |
| %1571:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1224:vgpr_32, implicit $mode, implicit $exec |
| %1572:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1230:vgpr_32, implicit $mode, implicit $exec |
| %1573:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1233:vgpr_32, implicit $mode, implicit $exec |
| %1574:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1236:vgpr_32, implicit $mode, implicit $exec |
| %1575:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1242:vgpr_32, implicit $mode, implicit $exec |
| %1576:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1245:vgpr_32, implicit $mode, implicit $exec |
| %1577:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1248:vgpr_32, implicit $mode, implicit $exec |
| %1578:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1254:vgpr_32, implicit $mode, implicit $exec |
| %1579:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1257:vgpr_32, implicit $mode, implicit $exec |
| %1580:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1260:vgpr_32, implicit $mode, implicit $exec |
| %1581:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1266:vgpr_32, implicit $mode, implicit $exec |
| %1582:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1269:vgpr_32, implicit $mode, implicit $exec |
| %1583:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1272:vgpr_32, implicit $mode, implicit $exec |
| %1584:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1278:vgpr_32, implicit $mode, implicit $exec |
| %1585:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1281:vgpr_32, implicit $mode, implicit $exec |
| %1586:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1284:vgpr_32, implicit $mode, implicit $exec |
| %1587:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1290:vgpr_32, implicit $mode, implicit $exec |
| %1588:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1293:vgpr_32, implicit $mode, implicit $exec |
| %1589:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1296:vgpr_32, implicit $mode, implicit $exec |
| %1590:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3345:vgpr_32, implicit $exec |
| %1591:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1590:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1592:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3334:vgpr_32, implicit $exec |
| %1593:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1592:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1594:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3335:vgpr_32, implicit $exec |
| %1595:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1594:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1596:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3336:vgpr_32, implicit $exec |
| %1597:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1596:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %1598:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 0, 0, implicit $exec |
| %1605:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 576, 0, implicit $exec |
| %1612:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1152, 0, implicit $exec |
| %1619:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1728, 0, implicit $exec |
| %1626:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 0, 0, implicit $exec |
| %1633:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 576, 0, implicit $exec |
| %1640:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1152, 0, implicit $exec |
| %1647:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1728, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| undef %3161.sub0:vreg_64_align2 = V_PERM_B32_e64 %1593.sub0:vreg_64_align2, %1591.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %3145.sub0:vreg_64_align2 = V_PERM_B32_e64 %1593.sub0:vreg_64_align2, %1591.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3161.sub1:vreg_64_align2 = V_PERM_B32_e64 %1597.sub0:vreg_64_align2, %1595.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %3145.sub1:vreg_64_align2 = V_PERM_B32_e64 %1597.sub0:vreg_64_align2, %1595.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| undef %3129.sub0:vreg_64_align2 = V_PERM_B32_e64 %1593.sub1:vreg_64_align2, %1591.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %3113.sub0:vreg_64_align2 = V_PERM_B32_e64 %1593.sub1:vreg_64_align2, %1591.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3129.sub1:vreg_64_align2 = V_PERM_B32_e64 %1597.sub1:vreg_64_align2, %1595.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %3113.sub1:vreg_64_align2 = V_PERM_B32_e64 %1597.sub1:vreg_64_align2, %1595.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| DS_WRITE_B64_gfx9 %1447:vgpr_32, %3161:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1449:vgpr_32, %3145:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1451:vgpr_32, %3129:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1453:vgpr_32, %3113:vreg_64_align2, 0, 0, implicit $exec |
| %1678:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3344:vgpr_32, implicit $exec |
| %1679:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1678:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1680:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3337:vgpr_32, implicit $exec |
| %1681:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1680:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1682:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3338:vgpr_32, implicit $exec |
| %1683:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1682:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1684:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3339:vgpr_32, implicit $exec |
| %1685:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1684:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %1686:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 0, 0, implicit $exec |
| %1693:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 576, 0, implicit $exec |
| %1700:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1152, 0, implicit $exec |
| %1707:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1728, 0, implicit $exec |
| %1714:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 0, 0, implicit $exec |
| %1721:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 576, 0, implicit $exec |
| %1728:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1152, 0, implicit $exec |
| %1735:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1728, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| undef %3062.sub0:vreg_64_align2 = V_PERM_B32_e64 %1681.sub0:vreg_64_align2, %1679.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %3046.sub0:vreg_64_align2 = V_PERM_B32_e64 %1681.sub0:vreg_64_align2, %1679.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3062.sub1:vreg_64_align2 = V_PERM_B32_e64 %1685.sub0:vreg_64_align2, %1683.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %3046.sub1:vreg_64_align2 = V_PERM_B32_e64 %1685.sub0:vreg_64_align2, %1683.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| undef %3029.sub0:vreg_64_align2 = V_PERM_B32_e64 %1681.sub1:vreg_64_align2, %1679.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %3013.sub0:vreg_64_align2 = V_PERM_B32_e64 %1681.sub1:vreg_64_align2, %1679.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3029.sub1:vreg_64_align2 = V_PERM_B32_e64 %1685.sub1:vreg_64_align2, %1683.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %3013.sub1:vreg_64_align2 = V_PERM_B32_e64 %1685.sub1:vreg_64_align2, %1683.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| DS_WRITE_B64_gfx9 %1447:vgpr_32, %3062:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1449:vgpr_32, %3046:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1451:vgpr_32, %3029:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1453:vgpr_32, %3013:vreg_64_align2, 0, 0, implicit $exec |
| %1766:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3343:vgpr_32, implicit $exec |
| %1767:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1766:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1768:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3340:vgpr_32, implicit $exec |
| %1769:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1768:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1770:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3341:vgpr_32, implicit $exec |
| %1771:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1770:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %1772:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3342:vgpr_32, implicit $exec |
| %1773:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1772:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %1774:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 0, 0, implicit $exec |
| %1781:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 576, 0, implicit $exec |
| %1788:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1152, 0, implicit $exec |
| %1795:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1728, 0, implicit $exec |
| %1802:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 0, 0, implicit $exec |
| %1809:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 576, 0, implicit $exec |
| %1816:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1152, 0, implicit $exec |
| %1823:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1728, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| undef %3185.sub0:vreg_64_align2 = V_PERM_B32_e64 %1769.sub0:vreg_64_align2, %1767.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %3169.sub0:vreg_64_align2 = V_PERM_B32_e64 %1769.sub0:vreg_64_align2, %1767.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3185.sub1:vreg_64_align2 = V_PERM_B32_e64 %1773.sub0:vreg_64_align2, %1771.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %3169.sub1:vreg_64_align2 = V_PERM_B32_e64 %1773.sub0:vreg_64_align2, %1771.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec |
| undef %3153.sub0:vreg_64_align2 = V_PERM_B32_e64 %1769.sub1:vreg_64_align2, %1767.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| undef %3137.sub0:vreg_64_align2 = V_PERM_B32_e64 %1769.sub1:vreg_64_align2, %1767.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| %3153.sub1:vreg_64_align2 = V_PERM_B32_e64 %1773.sub1:vreg_64_align2, %1771.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec |
| %3137.sub1:vreg_64_align2 = V_PERM_B32_e64 %1773.sub1:vreg_64_align2, %1771.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec |
| DS_WRITE_B64_gfx9 %1447:vgpr_32, %3185:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1449:vgpr_32, %3169:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1451:vgpr_32, %3153:vreg_64_align2, 0, 0, implicit $exec |
| DS_WRITE_B64_gfx9 %1453:vgpr_32, %3137:vreg_64_align2, 0, 0, implicit $exec |
| %1854:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1167:vgpr_32, implicit $mode, implicit $exec |
| %1855:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1179:vgpr_32, implicit $mode, implicit $exec |
| %1856:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1191:vgpr_32, implicit $mode, implicit $exec |
| %1857:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1203:vgpr_32, implicit $mode, implicit $exec |
| %1858:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1215:vgpr_32, implicit $mode, implicit $exec |
| %1859:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1227:vgpr_32, implicit $mode, implicit $exec |
| %1860:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1239:vgpr_32, implicit $mode, implicit $exec |
| %1861:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1251:vgpr_32, implicit $mode, implicit $exec |
| %1862:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1263:vgpr_32, implicit $mode, implicit $exec |
| %1863:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1275:vgpr_32, implicit $mode, implicit $exec |
| %1864:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1287:vgpr_32, implicit $mode, implicit $exec |
| %1865:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1299:vgpr_32, implicit $mode, implicit $exec |
| undef %3121.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1556:vgpr_32, 0, %1854:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3121.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1554:vgpr_32, 0, %1555:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3105.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1559:vgpr_32, 0, %1855:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3105.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1557:vgpr_32, 0, %1558:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3089.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1562:vgpr_32, 0, %1856:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3089.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1560:vgpr_32, 0, %1561:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3073.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1565:vgpr_32, 0, %1857:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3073.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1563:vgpr_32, 0, %1564:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1598.sub0_sub1:vreg_128_align2, %3121:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1598.sub2_sub3:vreg_128_align2, %3105:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1605.sub0_sub1:vreg_128_align2, %3121:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1605.sub2_sub3:vreg_128_align2, %3105:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1612.sub0_sub1:vreg_128_align2, %3121:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1612.sub2_sub3:vreg_128_align2, %3105:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1619.sub0_sub1:vreg_128_align2, %3121:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1619.sub2_sub3:vreg_128_align2, %3105:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1626.sub0_sub1:vreg_128_align2, %3089:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1626.sub2_sub3:vreg_128_align2, %3073:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1633.sub0_sub1:vreg_128_align2, %3089:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1633.sub2_sub3:vreg_128_align2, %3073:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1640.sub0_sub1:vreg_128_align2, %3089:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1640.sub2_sub3:vreg_128_align2, %3073:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1647.sub0_sub1:vreg_128_align2, %3089:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1647.sub2_sub3:vreg_128_align2, %3073:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| undef %2993.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1568:vgpr_32, 0, %1858:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %2993.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1566:vgpr_32, 0, %1567:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3195.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1571:vgpr_32, 0, %1859:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3195.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1569:vgpr_32, 0, %1570:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3178.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1574:vgpr_32, 0, %1860:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3178.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1572:vgpr_32, 0, %1573:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3162.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1577:vgpr_32, 0, %1861:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3162.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1575:vgpr_32, 0, %1576:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1686.sub0_sub1:vreg_128_align2, %2993:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1686.sub2_sub3:vreg_128_align2, %3195:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1693.sub0_sub1:vreg_128_align2, %2993:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1693.sub2_sub3:vreg_128_align2, %3195:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1700.sub0_sub1:vreg_128_align2, %2993:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1700.sub2_sub3:vreg_128_align2, %3195:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1707.sub0_sub1:vreg_128_align2, %2993:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1707.sub2_sub3:vreg_128_align2, %3195:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1714.sub0_sub1:vreg_128_align2, %3178:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1714.sub2_sub3:vreg_128_align2, %3162:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1721.sub0_sub1:vreg_128_align2, %3178:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1721.sub2_sub3:vreg_128_align2, %3162:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1728.sub0_sub1:vreg_128_align2, %3178:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1728.sub2_sub3:vreg_128_align2, %3162:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1735.sub0_sub1:vreg_128_align2, %3178:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1735.sub2_sub3:vreg_128_align2, %3162:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| undef %3146.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1580:vgpr_32, 0, %1862:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3146.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1578:vgpr_32, 0, %1579:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3130.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1583:vgpr_32, 0, %1863:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3130.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1581:vgpr_32, 0, %1582:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3114.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1586:vgpr_32, 0, %1864:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3114.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1584:vgpr_32, 0, %1585:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3098.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1589:vgpr_32, 0, %1865:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3098.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1587:vgpr_32, 0, %1588:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1774.sub0_sub1:vreg_128_align2, %3146:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1774.sub2_sub3:vreg_128_align2, %3130:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1781.sub0_sub1:vreg_128_align2, %3146:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1781.sub2_sub3:vreg_128_align2, %3130:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1788.sub0_sub1:vreg_128_align2, %3146:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1788.sub2_sub3:vreg_128_align2, %3130:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1795.sub0_sub1:vreg_128_align2, %3146:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1795.sub2_sub3:vreg_128_align2, %3130:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1802.sub0_sub1:vreg_128_align2, %3114:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1802.sub2_sub3:vreg_128_align2, %3098:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1809.sub0_sub1:vreg_128_align2, %3114:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1809.sub2_sub3:vreg_128_align2, %3098:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1816.sub0_sub1:vreg_128_align2, %3114:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1816.sub2_sub3:vreg_128_align2, %3098:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1823.sub0_sub1:vreg_128_align2, %3114:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1823.sub2_sub3:vreg_128_align2, %3098:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2054:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1347:vgpr_32, implicit $mode, implicit $exec |
| %2055:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1341:vgpr_32, implicit $mode, implicit $exec |
| %2056:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1335:vgpr_32, implicit $mode, implicit $exec |
| %2057:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1329:vgpr_32, implicit $mode, implicit $exec |
| %2058:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1323:vgpr_32, implicit $mode, implicit $exec |
| %2059:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1317:vgpr_32, implicit $mode, implicit $exec |
| %2060:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1311:vgpr_32, implicit $mode, implicit $exec |
| %2061:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1305:vgpr_32, implicit $mode, implicit $exec |
| %2062:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1344:vgpr_32, implicit $mode, implicit $exec |
| %2063:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1338:vgpr_32, implicit $mode, implicit $exec |
| %2064:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1332:vgpr_32, implicit $mode, implicit $exec |
| %2065:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1326:vgpr_32, implicit $mode, implicit $exec |
| %2066:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1320:vgpr_32, implicit $mode, implicit $exec |
| %2067:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1314:vgpr_32, implicit $mode, implicit $exec |
| %2068:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1308:vgpr_32, implicit $mode, implicit $exec |
| %2069:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1302:vgpr_32, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| undef %3082.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2068:vgpr_32, 0, %2060:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3082.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2069:vgpr_32, 0, %2061:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3066.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2066:vgpr_32, 0, %2058:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3066.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2067:vgpr_32, 0, %2059:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3050.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2064:vgpr_32, 0, %2056:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3050.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2065:vgpr_32, 0, %2057:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %3033.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2062:vgpr_32, 0, %2054:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %3033.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2063:vgpr_32, 0, %2055:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %2082:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 0, 0, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2082.sub0_sub1:vreg_128_align2, %3082:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2082.sub2_sub3:vreg_128_align2, %3066:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2095:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 576, 0, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2095.sub0_sub1:vreg_128_align2, %3082:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2095.sub2_sub3:vreg_128_align2, %3066:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2108:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1152, 0, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2108.sub0_sub1:vreg_128_align2, %3082:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2108.sub2_sub3:vreg_128_align2, %3066:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2121:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1728, 0, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2121.sub0_sub1:vreg_128_align2, %3082:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2121.sub2_sub3:vreg_128_align2, %3066:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2134:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 0, 0, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2134.sub0_sub1:vreg_128_align2, %3050:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2134.sub2_sub3:vreg_128_align2, %3033:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2146:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 576, 0, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2146.sub0_sub1:vreg_128_align2, %3050:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2146.sub2_sub3:vreg_128_align2, %3033:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2158:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1152, 0, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2158.sub0_sub1:vreg_128_align2, %3050:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2158.sub2_sub3:vreg_128_align2, %3033:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %2170:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1728, 0, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2170.sub0_sub1:vreg_128_align2, %3050:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2170.sub2_sub3:vreg_128_align2, %3033:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %3345:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3345:vgpr_32, implicit $exec |
| %3344:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3344:vgpr_32, implicit $exec |
| %3343:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3343:vgpr_32, implicit $exec |
| %3342:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3342:vgpr_32, implicit $exec |
| %3341:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3341:vgpr_32, implicit $exec |
| %3340:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3340:vgpr_32, implicit $exec |
| %3339:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3339:vgpr_32, implicit $exec |
| %3338:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3338:vgpr_32, implicit $exec |
| %3337:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3337:vgpr_32, implicit $exec |
| %3336:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3336:vgpr_32, implicit $exec |
| %3335:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3335:vgpr_32, implicit $exec |
| %3334:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3334:vgpr_32, implicit $exec |
| %3333:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3333:vgpr_32, implicit $exec |
| %3332:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3332:vgpr_32, implicit $exec |
| %3331:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3331:vgpr_32, implicit $exec |
| %3330:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3330:vgpr_32, implicit $exec |
| %3329:vgpr_32 = nuw V_ADD_U32_e32 128, %3329:vgpr_32, implicit $exec |
| S_ENDPGM 0 |
| ... |