| # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s |
| |
| --- | |
| define amdgpu_kernel void @smallInterleave() #0 { ret void } |
| ; GCN-LABEL: smallInterleave: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: ; implicit-def: $vgpr2 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_readfirstlane_b32 s20, v2 |
| ; GCN-NEXT: ; implicit-def: $sgpr4 |
| ; GCN-NEXT: ; implicit-def: $vgpr3 |
| ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 |
| ; GCN-NEXT: ; implicit-def: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-NEXT: ; implicit-def: $vgpr50 |
| ; GCN-NEXT: ; implicit-def: $sgpr16_sgpr17_sgpr18_sgpr19 |
| ; GCN-NEXT: ; implicit-def: $vgpr49 |
| ; GCN-NEXT: ; implicit-def: $vgpr40_vgpr41_vgpr42_vgpr43 |
| ; GCN-NEXT: ; implicit-def: $vgpr51 |
| ; GCN-NEXT: ; implicit-def: $vgpr62_vgpr63_vgpr64_vgpr65 |
| ; GCN-NEXT: ; implicit-def: $vgpr76 |
| ; GCN-NEXT: ; implicit-def: $vgpr77 |
| ; GCN-NEXT: ; implicit-def: $vgpr78 |
| ; GCN-NEXT: ; implicit-def: $vgpr79 |
| ; GCN-NEXT: ; implicit-def: $vgpr80 |
| ; GCN-NEXT: ; implicit-def: $vgpr91 |
| ; GCN-NEXT: ; kill: killed $sgpr16_sgpr17_sgpr18_sgpr19 |
| ; GCN-NEXT: ; iglp_opt mask(0x00000002) |
| ; GCN-NEXT: s_nop 1 |
| ; GCN-NEXT: v_lshl_add_u32 v2, s20, 4, v3 |
| ; GCN-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s4, v2, v[0:1] |
| ; GCN-NEXT: buffer_load_dwordx4 v[0:3], v4, s[0:3], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: s_lshl_b32 s4, s20, 7 |
| ; GCN-NEXT: ; implicit-def: $vgpr5 |
| ; GCN-NEXT: v_add_lshl_u32 v48, v5, s4, 1 |
| ; GCN-NEXT: v_add_u32_e32 v76, s20, v76 |
| ; GCN-NEXT: v_and_b32_e32 v76, 0x1fffffff, v76 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: ds_write_b128 v48, v[0:3] |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_load_dwordx4 v[32:35], v4, s[0:3], 0 offen offset:64 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ; implicit-def: $vgpr0 |
| ; GCN-NEXT: ; implicit-def: $vgpr1 |
| ; GCN-NEXT: ; implicit-def: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 |
| ; GCN-NEXT: ; implicit-def: $sgpr6 |
| ; GCN-NEXT: v_add_u32_e32 v0, v0, v50 |
| ; GCN-NEXT: v_add_u32_e32 v1, v1, v50 |
| ; GCN-NEXT: buffer_load_dwordx2 v[72:73], v0, s[16:19], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx2 v[74:75], v1, s[16:19], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: ds_read_b128 v[36:39], v49 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[44:47], v49 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[36:37], v[40:41], 0 |
| ; GCN-NEXT: ; kill: killed $vgpr1 |
| ; GCN-NEXT: ; kill: killed $vgpr0 |
| ; GCN-NEXT: v_mul_lo_u32 v76, v76, s6 |
| ; GCN-NEXT: v_add_lshl_u32 v76, v77, v76, 1 |
| ; GCN-NEXT: v_lshl_add_u32 v77, v78, 1, v76 |
| ; GCN-NEXT: ; implicit-def: $sgpr5 |
| ; GCN-NEXT: v_lshl_add_u32 v78, v79, 1, v77 |
| ; GCN-NEXT: ; implicit-def: $sgpr2 |
| ; GCN-NEXT: ; implicit-def: $sgpr3 |
| ; GCN-NEXT: v_lshl_add_u32 v79, v80, 1, v78 |
| ; GCN-NEXT: ; implicit-def: $sgpr0_sgpr1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[44:45], v[40:41], 0 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[38:39], v[42:43], v[16:31] |
| ; GCN-NEXT: ds_read_b128 v[36:39], v51 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[46:47], v[42:43], v[0:15] |
| ; GCN-NEXT: ds_read_b128 v[44:47], v51 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ; implicit-def: $vgpr40_vgpr41_vgpr42_vgpr43 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: ds_write_b128 v48, v[32:35] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[36:37], v[40:41], v[16:31] |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_read_b128 v[32:35], v49 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[44:45], v[40:41], v[0:15] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[38:39], v[42:43], v[16:31] |
| ; GCN-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[46:47], v[42:43], v[0:15] |
| ; GCN-NEXT: ds_read_b128 v[40:43], v49 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[68:71], v51 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[32:33], v[36:37], v[16:31] |
| ; GCN-NEXT: ; implicit-def: $vgpr32 |
| ; GCN-NEXT: ; implicit-def: $vgpr33 |
| ; GCN-NEXT: v_add_u32_e32 v82, v32, v50 |
| ; GCN-NEXT: v_add_u32_e32 v83, v33, v50 |
| ; GCN-NEXT: ; kill: killed $vgpr82 |
| ; GCN-NEXT: ; kill: killed $vgpr83 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[34:35], v[38:39], v[16:31] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[40:41], v[36:37], v[0:15] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[68:69], v[62:63], v[16:31] |
| ; GCN-NEXT: ds_read_b128 v[66:69], v51 offset:512 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[42:43], v[38:39], v[0:15] |
| ; GCN-NEXT: ; implicit-def: $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[66:67], v[62:63], v[0:15] |
| ; GCN-NEXT: ; implicit-def: $vgpr66 |
| ; GCN-NEXT: ; implicit-def: $vgpr67 |
| ; GCN-NEXT: v_max_f32_e32 v81, v67, v67 |
| ; GCN-NEXT: ; implicit-def: $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[70:71], v[64:65], v[16:31] |
| ; GCN-NEXT: v_perm_b32 v70, v74, v72, s2 |
| ; GCN-NEXT: v_perm_b32 v71, v74, v72, s3 |
| ; GCN-NEXT: v_perm_b32 v72, v75, v73, s2 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: ds_write_b32 v76, v70 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b32 v77, v71 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b32 v78, v72 |
| ; GCN-NEXT: v_mul_f32_e32 v74, s4, v20 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[68:69], v[64:65], v[0:15] |
| ; GCN-NEXT: v_mul_f32_e32 v64, s4, v16 |
| ; GCN-NEXT: v_mul_f32_e32 v65, s4, v17 |
| ; GCN-NEXT: v_mul_f32_e32 v68, s4, v18 |
| ; GCN-NEXT: v_mul_f32_e32 v69, s4, v19 |
| ; GCN-NEXT: v_max3_f32 v64, v64, s5, v65 |
| ; GCN-NEXT: v_mul_f32_e32 v80, s4, v21 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v68, v69 |
| ; GCN-NEXT: v_mul_f32_e32 v84, s4, v22 |
| ; GCN-NEXT: v_mul_f32_e32 v85, s4, v23 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v74, v80 |
| ; GCN-NEXT: v_mul_f32_e32 v86, s4, v24 |
| ; GCN-NEXT: v_mul_f32_e32 v87, s4, v25 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v84, v85 |
| ; GCN-NEXT: v_mul_f32_e32 v65, s4, v26 |
| ; GCN-NEXT: v_mul_f32_e32 v68, s4, v27 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v86, v87 |
| ; GCN-NEXT: v_mul_f32_e32 v69, s4, v28 |
| ; GCN-NEXT: v_mul_f32_e32 v74, s4, v29 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v65, v68 |
| ; GCN-NEXT: v_mul_f32_e32 v80, s4, v30 |
| ; GCN-NEXT: v_mul_f32_e32 v84, s4, v31 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v69, v74 |
| ; GCN-NEXT: v_mul_f32_e32 v85, s4, v0 |
| ; GCN-NEXT: v_mul_f32_e32 v86, s4, v1 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v80, v84 |
| ; GCN-NEXT: v_mul_f32_e32 v87, s4, v2 |
| ; GCN-NEXT: v_mul_f32_e32 v65, s4, v3 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v85, v86 |
| ; GCN-NEXT: v_mul_f32_e32 v68, s4, v4 |
| ; GCN-NEXT: v_mul_f32_e32 v69, s4, v5 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v87, v65 |
| ; GCN-NEXT: v_mul_f32_e32 v74, s4, v6 |
| ; GCN-NEXT: v_mul_f32_e32 v80, s4, v7 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v68, v69 |
| ; GCN-NEXT: v_mul_f32_e32 v84, s4, v8 |
| ; GCN-NEXT: v_mul_f32_e32 v85, s4, v9 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v74, v80 |
| ; GCN-NEXT: v_mul_f32_e32 v86, s4, v10 |
| ; GCN-NEXT: v_mul_f32_e32 v65, s4, v11 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v84, v85 |
| ; GCN-NEXT: v_mul_f32_e32 v87, s4, v12 |
| ; GCN-NEXT: v_mul_f32_e32 v68, s4, v13 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v86, v65 |
| ; GCN-NEXT: v_mul_f32_e32 v69, s4, v14 |
| ; GCN-NEXT: v_mul_f32_e32 v74, s4, v15 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v87, v68 |
| ; GCN-NEXT: v_max3_f32 v64, v64, v69, v74 |
| ; GCN-NEXT: ds_bpermute_b32 v65, v66, v64 |
| ; GCN-NEXT: v_perm_b32 v68, v75, v73, s3 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b32 v79, v68 |
| ; GCN-NEXT: ; implicit-def: $vgpr84 |
| ; GCN-NEXT: v_max_f32_e32 v65, v65, v65 |
| ; GCN-NEXT: v_max_f32_e32 v70, v64, v65 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_load_dwordx2 v[64:65], v82, s[16:19], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: buffer_load_dwordx2 v[68:69], v83, s[16:19], 0 offen sc0 sc1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_bpermute_b32 v71, v66, v70 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: v_cndmask_b32_e64 v70, v71, v70, s[0:1] |
| ; GCN-NEXT: v_max_f32_e32 v70, v70, v70 |
| ; GCN-NEXT: v_max_f32_e32 v72, v81, v70 |
| ; GCN-NEXT: v_fma_f32 v16, s4, v16, -v72 |
| ; GCN-NEXT: v_fma_f32 v18, s4, v18, -v72 |
| ; GCN-NEXT: v_fma_f32 v19, s4, v19, -v72 |
| ; GCN-NEXT: v_mul_f32_e32 v16, 0x3fb8aa3b, v16 |
| ; GCN-NEXT: v_mul_f32_e32 v18, 0x3fb8aa3b, v18 |
| ; GCN-NEXT: v_mul_f32_e32 v19, 0x3fb8aa3b, v19 |
| ; GCN-NEXT: v_fma_f32 v17, s4, v17, -v72 |
| ; GCN-NEXT: v_fma_f32 v20, s4, v20, -v72 |
| ; GCN-NEXT: v_fma_f32 v21, s4, v21, -v72 |
| ; GCN-NEXT: v_fma_f32 v22, s4, v22, -v72 |
| ; GCN-NEXT: v_fma_f32 v23, s4, v23, -v72 |
| ; GCN-NEXT: v_exp_f32_e32 v73, v16 |
| ; GCN-NEXT: v_exp_f32_e32 v74, v18 |
| ; GCN-NEXT: v_exp_f32_e32 v75, v19 |
| ; GCN-NEXT: v_mul_f32_e32 v20, 0x3fb8aa3b, v20 |
| ; GCN-NEXT: v_mul_f32_e32 v21, 0x3fb8aa3b, v21 |
| ; GCN-NEXT: v_mul_f32_e32 v22, 0x3fb8aa3b, v22 |
| ; GCN-NEXT: v_exp_f32_e32 v80, v20 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v16, v73 |
| ; GCN-NEXT: v_fma_f32 v18, s4, v24, -v72 |
| ; GCN-NEXT: v_exp_f32_e32 v81, v21 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v21, v74 |
| ; GCN-NEXT: v_fma_f32 v20, s4, v25, -v72 |
| ; GCN-NEXT: v_exp_f32_e32 v82, v22 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v22, v75 |
| ; GCN-NEXT: v_mul_f32_e32 v17, 0x3fb8aa3b, v17 |
| ; GCN-NEXT: v_mul_f32_e32 v23, 0x3fb8aa3b, v23 |
| ; GCN-NEXT: v_fma_f32 v26, s4, v26, -v72 |
| ; GCN-NEXT: v_pack_b32_f16 v71, v21, v22 |
| ; GCN-NEXT: v_mul_f32_e32 v22, 0x3fb8aa3b, v18 |
| ; GCN-NEXT: v_sub_f32_e32 v24, v67, v72 |
| ; GCN-NEXT: v_exp_f32_e32 v83, v23 |
| ; GCN-NEXT: v_fma_f32 v67, s4, v27, -v72 |
| ; GCN-NEXT: v_exp_f32_e32 v85, v22 |
| ; GCN-NEXT: v_exp_f32_e32 v17, v17 |
| ; GCN-NEXT: v_mul_f32_e32 v24, 0x3fb8aa3b, v24 |
| ; GCN-NEXT: v_mul_f32_e32 v23, 0x3fb8aa3b, v20 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v19, v17 |
| ; GCN-NEXT: v_fma_f32 v87, s4, v29, -v72 |
| ; GCN-NEXT: v_exp_f32_e32 v88, v23 |
| ; GCN-NEXT: v_fma_f32 v0, s4, v0, -v72 |
| ; GCN-NEXT: v_pack_b32_f16 v70, v16, v19 |
| ; GCN-NEXT: ds_read_b128 v[18:21], v84 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_exp_f32_e32 v16, v24 |
| ; GCN-NEXT: ds_read_b128 v[22:25], v84 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_pk_mul_f32 v[48:49], v[48:49], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[50:51], v[50:51], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[52:53], v[52:53], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[54:55], v[54:55], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[56:57], v[56:57], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[58:59], v[58:59], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[60:61], v[60:61], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[62:63], v[62:63], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[32:33], v[32:33], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[34:35], v[34:35], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[36:37], v[36:37], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[38:39], v[38:39], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[40:41], v[40:41], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[42:43], v[42:43], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[44:45], v[44:45], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_pk_mul_f32 v[46:47], v[46:47], v[16:17] op_sel_hi:[1,0] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[18:19], v[70:71], v[48:63] |
| ; GCN-NEXT: v_add_f32_e32 v18, 0, v73 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v89, v83 |
| ; GCN-NEXT: v_fma_f32 v73, s4, v28, -v72 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v19, v80 |
| ; GCN-NEXT: v_fma_f32 v1, s4, v1, -v72 |
| ; GCN-NEXT: v_perm_b32 v90, v69, v65, s2 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[22:23], v[70:71], v[32:47] |
| ; GCN-NEXT: v_add_f32_e32 v17, v17, v18 |
| ; GCN-NEXT: v_mul_f32_e32 v18, 0x3fb8aa3b, v26 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v86, v81 |
| ; GCN-NEXT: v_fma_f32 v23, s4, v30, -v72 |
| ; GCN-NEXT: v_exp_f32_e32 v30, v18 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v22, v82 |
| ; GCN-NEXT: v_fma_f32 v18, s4, v31, -v72 |
| ; GCN-NEXT: v_perm_b32 v31, v68, v64, s2 |
| ; GCN-NEXT: v_perm_b32 v64, v68, v64, s3 |
| ; GCN-NEXT: v_perm_b32 v65, v69, v65, s3 |
| ; GCN-NEXT: ds_read_b128 v[26:29], v91 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ds_read_b128 v[68:71], v91 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: ds_write_b32 v76, v31 |
| ; GCN-NEXT: v_mul_f32_e32 v31, 0x3fb8aa3b, v67 |
| ; GCN-NEXT: v_exp_f32_e32 v31, v31 |
| ; GCN-NEXT: v_mul_f32_e32 v67, 0x3fb8aa3b, v18 |
| ; GCN-NEXT: v_pack_b32_f16 v18, v19, v86 |
| ; GCN-NEXT: v_pack_b32_f16 v19, v22, v89 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b32 v77, v64 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b32 v78, v90 |
| ; GCN-NEXT: buffer_wbl2 sc0 sc1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_write_b32 v79, v65 |
| ; GCN-NEXT: v_mul_f32_e32 v64, 0x3fb8aa3b, v73 |
| ; GCN-NEXT: v_mul_f32_e32 v65, 0x3fb8aa3b, v87 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[20:21], v[18:19], v[48:63] |
| ; GCN-NEXT: v_add_f32_e32 v17, v74, v17 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v20, v85 |
| ; GCN-NEXT: v_fma_f32 v2, s4, v2, -v72 |
| ; GCN-NEXT: v_exp_f32_e32 v22, v64 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v21, v88 |
| ; GCN-NEXT: v_exp_f32_e32 v64, v65 |
| ; GCN-NEXT: v_mul_f32_e32 v23, 0x3fb8aa3b, v23 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[24:25], v[18:19], v[32:47] |
| ; GCN-NEXT: v_add_f32_e32 v17, v75, v17 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v18, v30 |
| ; GCN-NEXT: v_fma_f32 v24, s4, v3, -v72 |
| ; GCN-NEXT: v_exp_f32_e32 v23, v23 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v19, v31 |
| ; GCN-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v0 |
| ; GCN-NEXT: v_mul_f32_e32 v65, 0x3fb8aa3b, v1 |
| ; GCN-NEXT: v_pack_b32_f16 v0, v20, v21 |
| ; GCN-NEXT: v_pack_b32_f16 v1, v18, v19 |
| ; GCN-NEXT: v_fma_f32 v6, s4, v6, -v72 |
| ; GCN-NEXT: v_exp_f32_e32 v25, v67 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[26:27], v[0:1], v[48:63] |
| ; GCN-NEXT: v_add_f32_e32 v17, v80, v17 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v18, v22 |
| ; GCN-NEXT: v_fma_f32 v26, s4, v4, -v72 |
| ; GCN-NEXT: v_exp_f32_e32 v27, v3 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v4, v64 |
| ; GCN-NEXT: v_fma_f32 v67, s4, v5, -v72 |
| ; GCN-NEXT: v_exp_f32_e32 v65, v65 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[68:69], v[0:1], v[32:47] |
| ; GCN-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 |
| ; GCN-NEXT: v_add_f32_e32 v17, v81, v17 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v5, v23 |
| ; GCN-NEXT: v_fma_f32 v7, s4, v7, -v72 |
| ; GCN-NEXT: v_exp_f32_e32 v68, v2 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v19, v25 |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_mul_f32_e32 v24, 0x3fb8aa3b, v24 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: ds_read_b128 v[0:3], v84 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_pack_b32_f16 v4, v18, v4 |
| ; GCN-NEXT: v_pack_b32_f16 v5, v5, v19 |
| ; GCN-NEXT: v_exp_f32_e32 v24, v24 |
| ; GCN-NEXT: ds_read_b128 v[18:21], v84 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mul_f32_e32 v26, 0x3fb8aa3b, v26 |
| ; GCN-NEXT: v_mul_f32_e32 v67, 0x3fb8aa3b, v67 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[28:29], v[4:5], v[48:63] |
| ; GCN-NEXT: v_add_f32_e32 v17, v82, v17 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v28, v27 |
| ; GCN-NEXT: v_exp_f32_e32 v26, v26 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v29, v65 |
| ; GCN-NEXT: v_fma_f32 v10, s4, v10, -v72 |
| ; GCN-NEXT: v_exp_f32_e32 v67, v67 |
| ; GCN-NEXT: v_mul_f32_e32 v6, 0x3fb8aa3b, v6 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[70:71], v[4:5], v[32:47] |
| ; GCN-NEXT: v_add_f32_e32 v17, v83, v17 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v5, v68 |
| ; GCN-NEXT: v_exp_f32_e32 v6, v6 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v69, v24 |
| ; GCN-NEXT: v_mul_f32_e32 v7, 0x3fb8aa3b, v7 |
| ; GCN-NEXT: v_exp_f32_e32 v7, v7 |
| ; GCN-NEXT: v_pack_b32_f16 v4, v28, v29 |
| ; GCN-NEXT: v_pack_b32_f16 v5, v5, v69 |
| ; GCN-NEXT: ; implicit-def: $sgpr2 |
| ; GCN-NEXT: s_nop 1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[0:1], v[4:5], v[48:63] |
| ; GCN-NEXT: v_add_f32_e32 v0, v85, v17 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v17, v26 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v28, v67 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[18:19], v[4:5], v[32:47] |
| ; GCN-NEXT: v_add_f32_e32 v4, v88, v0 |
| ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v10 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v1, v6 |
| ; GCN-NEXT: v_exp_f32_e32 v10, v0 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v0, v7 |
| ; GCN-NEXT: v_pack_b32_f16 v1, v1, v0 |
| ; GCN-NEXT: v_pack_b32_f16 v0, v17, v28 |
| ; GCN-NEXT: s_nop 1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[2:3], v[0:1], v[48:63] |
| ; GCN-NEXT: v_add_f32_e32 v2, v30, v4 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[20:21], v[0:1], v[32:47] |
| ; GCN-NEXT: v_add_f32_e32 v0, v31, v2 |
| ; GCN-NEXT: v_add_f32_e32 v0, v22, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v64, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v23, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v25, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v27, v0 |
| ; GCN-NEXT: v_fma_f32 v8, s4, v8, -v72 |
| ; GCN-NEXT: v_add_f32_e32 v0, v65, v0 |
| ; GCN-NEXT: v_fma_f32 v9, s4, v9, -v72 |
| ; GCN-NEXT: v_mul_f32_e32 v8, 0x3fb8aa3b, v8 |
| ; GCN-NEXT: v_add_f32_e32 v0, v68, v0 |
| ; GCN-NEXT: v_fma_f32 v11, s4, v11, -v72 |
| ; GCN-NEXT: v_mul_f32_e32 v9, 0x3fb8aa3b, v9 |
| ; GCN-NEXT: v_fma_f32 v12, s4, v12, -v72 |
| ; GCN-NEXT: v_fma_f32 v13, s4, v13, -v72 |
| ; GCN-NEXT: v_exp_f32_e32 v8, v8 |
| ; GCN-NEXT: v_add_f32_e32 v0, v24, v0 |
| ; GCN-NEXT: v_fma_f32 v5, s4, v14, -v72 |
| ; GCN-NEXT: v_exp_f32_e32 v9, v9 |
| ; GCN-NEXT: v_add_f32_e32 v0, v26, v0 |
| ; GCN-NEXT: v_add_f32_e32 v0, v67, v0 |
| ; GCN-NEXT: v_fma_f32 v14, s4, v15, -v72 |
| ; GCN-NEXT: v_mul_f32_e32 v11, 0x3fb8aa3b, v11 |
| ; GCN-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v12 |
| ; GCN-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v5 |
| ; GCN-NEXT: v_add_f32_e32 v0, v6, v0 |
| ; GCN-NEXT: v_exp_f32_e32 v11, v11 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v4, v8 |
| ; GCN-NEXT: v_exp_f32_e32 v12, v3 |
| ; GCN-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v13 |
| ; GCN-NEXT: v_exp_f32_e32 v17, v1 |
| ; GCN-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v14 |
| ; GCN-NEXT: v_add_f32_e32 v0, v7, v0 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v13, v9 |
| ; GCN-NEXT: v_exp_f32_e32 v15, v3 |
| ; GCN-NEXT: v_exp_f32_e32 v18, v1 |
| ; GCN-NEXT: v_add_f32_e32 v6, v8, v0 |
| ; GCN-NEXT: ds_read_b128 v[0:3], v91 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v5, v10 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v14, v11 |
| ; GCN-NEXT: v_add_f32_e32 v6, v9, v6 |
| ; GCN-NEXT: v_pack_b32_f16 v8, v4, v13 |
| ; GCN-NEXT: v_add_f32_e32 v6, v10, v6 |
| ; GCN-NEXT: v_pack_b32_f16 v9, v5, v14 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v7, v18 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v10, v15 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[0:1], v[8:9], v[48:63] |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v0, v17 |
| ; GCN-NEXT: v_cvt_f16_f32_e32 v4, v12 |
| ; GCN-NEXT: v_add_f32_e32 v6, v11, v6 |
| ; GCN-NEXT: v_add_f32_e32 v6, v12, v6 |
| ; GCN-NEXT: v_add_f32_e32 v1, v15, v6 |
| ; GCN-NEXT: v_add_f32_e32 v11, v17, v1 |
| ; GCN-NEXT: v_pack_b32_f16 v1, v0, v7 |
| ; GCN-NEXT: v_pack_b32_f16 v0, v4, v10 |
| ; GCN-NEXT: ds_read_b128 v[4:7], v91 offset:576 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_inv sc0 sc1 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[4:5], v[8:9], v[32:47] |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: s_waitcnt vmcnt(8) |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: v_mov_b32_e32 v4, 0 |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[6:7], v[0:1], v[32:47] |
| ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[2:3], v[0:1], v[48:63] |
| ; GCN-NEXT: v_add_f32_e32 v2, v18, v11 |
| ; GCN-NEXT: ds_bpermute_b32 v3, v66, v2 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: v_add_f32_e32 v2, v2, v3 |
| ; GCN-NEXT: ds_bpermute_b32 v3, v66, v2 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[0:1] |
| ; GCN-NEXT: v_fmac_f32_e32 v2, v4, v16 |
| ; GCN-NEXT: s_endpgm |
| attributes #0 = {"amdgpu-flat-work-group-size"="256,256"} |
| |
| !0 = !{i64 2862105} |
| ... |
| |
| --- |
| name: smallInterleave |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| stackPtrOffsetReg: '$sgpr32' |
| body: | |
| bb.0: |
| liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2, $sgpr3, $sgpr4 |
| %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %1:vgpr_32 = COPY %0:vgpr_32 |
| %2:vgpr_32 = IMPLICIT_DEF |
| %3:sreg_32 = IMPLICIT_DEF |
| %4:vreg_64_align2 = IMPLICIT_DEF |
| %5:sgpr_128 = IMPLICIT_DEF |
| %6:vgpr_32 = IMPLICIT_DEF |
| %7:vgpr_32 = IMPLICIT_DEF |
| %8:sgpr_128 = IMPLICIT_DEF |
| %9:vgpr_32 = IMPLICIT_DEF |
| %10:sgpr_512 = IMPLICIT_DEF |
| %11:sgpr_32 = IMPLICIT_DEF |
| %12:sreg_64_xexec = IMPLICIT_DEF |
| %13:vgpr_32 = IMPLICIT_DEF |
| %14:sreg_32 = IMPLICIT_DEF |
| %15:sreg_32 = IMPLICIT_DEF |
| %16:vgpr_32 = IMPLICIT_DEF |
| %17:sreg_32 = IMPLICIT_DEF |
| %18:vgpr_32 = IMPLICIT_DEF |
| %19:vgpr_32 = IMPLICIT_DEF |
| %20:vgpr_32 = IMPLICIT_DEF |
| %21:vgpr_32 = IMPLICIT_DEF |
| %22:vgpr_32 = IMPLICIT_DEF |
| %23:vgpr_32 = IMPLICIT_DEF |
| %24:vgpr_32 = IMPLICIT_DEF |
| %25:vgpr_32 = IMPLICIT_DEF |
| %26:sreg_32 = IMPLICIT_DEF |
| %42:vgpr_32 = IMPLICIT_DEF |
| %44:vreg_128_align2 = IMPLICIT_DEF |
| %48:vgpr_32 = IMPLICIT_DEF |
| %49:vreg_128_align2 = IMPLICIT_DEF |
| %52:vreg_128_align2 = IMPLICIT_DEF |
| %55:vreg_128_align2 = IMPLICIT_DEF |
| %106:vgpr_32 = IMPLICIT_DEF |
| %29:vgpr_32 = IMPLICIT_DEF |
| %37:vgpr_32 = IMPLICIT_DEF |
| %259:vreg_512_align2 = IMPLICIT_DEF |
| %260:vreg_512_align2 = IMPLICIT_DEF |
| IGLP_OPT 2 |
| %27:sreg_32_xm0 = V_READFIRSTLANE_B32 %2:vgpr_32, implicit $exec |
| %28:vgpr_32 = V_LSHL_ADD_U32_e64 %27:sreg_32_xm0, 4, %29:vgpr_32, implicit $exec |
| %30:vreg_64_align2, dead %31:sreg_64 = V_MAD_U64_U32_e64 %3:sreg_32, %28:vgpr_32, %4:vreg_64_align2, 0, implicit $exec |
| %32:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %30.sub0:vreg_64_align2, %5:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %33:sreg_32 = S_LSHL_B32 %27:sreg_32_xm0, 7, implicit-def dead $scc |
| %34:vgpr_32 = V_ADD_LSHL_U32_e64 %6:vgpr_32, %33:sreg_32, 1, implicit $exec |
| DS_WRITE_B128_gfx9 %34:vgpr_32, %32:vreg_128_align2, 0, 0, implicit $exec |
| %35:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %30.sub0:vreg_64_align2, %5:sgpr_128, 0, 64, 0, 0, implicit $exec |
| %36:vgpr_32 = V_ADD_U32_e32 %7:vgpr_32, %37:vgpr_32, implicit $exec |
| %38:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %36:vgpr_32, %8:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %39:vgpr_32 = V_ADD_U32_e32 %9:vgpr_32, %37:vgpr_32, implicit $exec |
| %40:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %39:vgpr_32, %8:sgpr_128, 0, 0, 0, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %41:vreg_128_align2 = DS_READ_B128_gfx9 %42:vgpr_32, 0, 0, implicit $exec |
| early-clobber %43:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_vgprcd_e64 %41.sub0_sub1:vreg_128_align2, %44.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %43:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %41.sub2_sub3:vreg_128_align2, %44.sub2_sub3:vreg_128_align2, %43:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %45:vreg_128_align2 = DS_READ_B128_gfx9 %42:vgpr_32, 512, 0, implicit $exec |
| early-clobber %46:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_vgprcd_e64 %45.sub0_sub1:vreg_128_align2, %44.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %46:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %45.sub2_sub3:vreg_128_align2, %44.sub2_sub3:vreg_128_align2, %46:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %47:vreg_128_align2 = DS_READ_B128_gfx9 %48:vgpr_32, 0, 0, implicit $exec |
| %43:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %47.sub0_sub1:vreg_128_align2, %49.sub0_sub1:vreg_128_align2, %43:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %43:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %47.sub2_sub3:vreg_128_align2, %49.sub2_sub3:vreg_128_align2, %43:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %50:vreg_128_align2 = DS_READ_B128_gfx9 %48:vgpr_32, 512, 0, implicit $exec |
| %46:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %50.sub0_sub1:vreg_128_align2, %49.sub0_sub1:vreg_128_align2, %46:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %46:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %50.sub2_sub3:vreg_128_align2, %49.sub2_sub3:vreg_128_align2, %46:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| DS_WRITE_B128_gfx9 %34:vgpr_32, %35:vreg_128_align2, 0, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %51:vreg_128_align2 = DS_READ_B128_gfx9 %42:vgpr_32, 0, 0, implicit $exec |
| %43:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %51.sub0_sub1:vreg_128_align2, %52.sub0_sub1:vreg_128_align2, %43:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %43:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %51.sub2_sub3:vreg_128_align2, %52.sub2_sub3:vreg_128_align2, %43:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %53:vreg_128_align2 = DS_READ_B128_gfx9 %42:vgpr_32, 512, 0, implicit $exec |
| %46:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %53.sub0_sub1:vreg_128_align2, %52.sub0_sub1:vreg_128_align2, %46:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %46:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %53.sub2_sub3:vreg_128_align2, %52.sub2_sub3:vreg_128_align2, %46:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %54:vreg_128_align2 = DS_READ_B128_gfx9 %48:vgpr_32, 0, 0, implicit $exec |
| %43:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %54.sub0_sub1:vreg_128_align2, %55.sub0_sub1:vreg_128_align2, %43:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %43:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %54.sub2_sub3:vreg_128_align2, %55.sub2_sub3:vreg_128_align2, %43:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %56:vreg_128_align2 = DS_READ_B128_gfx9 %48:vgpr_32, 512, 0, implicit $exec |
| %46:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %56.sub0_sub1:vreg_128_align2, %55.sub0_sub1:vreg_128_align2, %46:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %46:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %56.sub2_sub3:vreg_128_align2, %55.sub2_sub3:vreg_128_align2, %46:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %57:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub0:vreg_512_align2, implicit $mode, implicit $exec |
| %58:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub1:vreg_512_align2, implicit $mode, implicit $exec |
| %59:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub2:vreg_512_align2, implicit $mode, implicit $exec |
| %60:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub3:vreg_512_align2, implicit $mode, implicit $exec |
| %61:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub4:vreg_512_align2, implicit $mode, implicit $exec |
| %62:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub5:vreg_512_align2, implicit $mode, implicit $exec |
| %63:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub6:vreg_512_align2, implicit $mode, implicit $exec |
| %64:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub7:vreg_512_align2, implicit $mode, implicit $exec |
| %65:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub8:vreg_512_align2, implicit $mode, implicit $exec |
| %66:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub9:vreg_512_align2, implicit $mode, implicit $exec |
| %67:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub10:vreg_512_align2, implicit $mode, implicit $exec |
| %68:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub11:vreg_512_align2, implicit $mode, implicit $exec |
| %69:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub12:vreg_512_align2, implicit $mode, implicit $exec |
| %70:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub13:vreg_512_align2, implicit $mode, implicit $exec |
| %71:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub14:vreg_512_align2, implicit $mode, implicit $exec |
| %72:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %43.sub15:vreg_512_align2, implicit $mode, implicit $exec |
| %73:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub0:vreg_512_align2, implicit $mode, implicit $exec |
| %74:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub1:vreg_512_align2, implicit $mode, implicit $exec |
| %75:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub2:vreg_512_align2, implicit $mode, implicit $exec |
| %76:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub3:vreg_512_align2, implicit $mode, implicit $exec |
| %77:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub4:vreg_512_align2, implicit $mode, implicit $exec |
| %78:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub5:vreg_512_align2, implicit $mode, implicit $exec |
| %79:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub6:vreg_512_align2, implicit $mode, implicit $exec |
| %80:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub7:vreg_512_align2, implicit $mode, implicit $exec |
| %81:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub8:vreg_512_align2, implicit $mode, implicit $exec |
| %82:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub9:vreg_512_align2, implicit $mode, implicit $exec |
| %83:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub10:vreg_512_align2, implicit $mode, implicit $exec |
| %84:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub11:vreg_512_align2, implicit $mode, implicit $exec |
| %85:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub12:vreg_512_align2, implicit $mode, implicit $exec |
| %86:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub13:vreg_512_align2, implicit $mode, implicit $exec |
| %87:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub14:vreg_512_align2, implicit $mode, implicit $exec |
| %88:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %10.sub4:sgpr_512, %46.sub15:vreg_512_align2, implicit $mode, implicit $exec |
| %89:vgpr_32 = V_MAX3_F32_e64 0, %57:vgpr_32, 0, %11:sgpr_32, 0, %58:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %90:vgpr_32 = V_MAX3_F32_e64 0, %89:vgpr_32, 0, %59:vgpr_32, 0, %60:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %91:vgpr_32 = V_MAX3_F32_e64 0, %90:vgpr_32, 0, %61:vgpr_32, 0, %62:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %92:vgpr_32 = V_MAX3_F32_e64 0, %91:vgpr_32, 0, %63:vgpr_32, 0, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %93:vgpr_32 = V_MAX3_F32_e64 0, %92:vgpr_32, 0, %65:vgpr_32, 0, %66:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %94:vgpr_32 = V_MAX3_F32_e64 0, %93:vgpr_32, 0, %67:vgpr_32, 0, %68:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %95:vgpr_32 = V_MAX3_F32_e64 0, %94:vgpr_32, 0, %69:vgpr_32, 0, %70:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %96:vgpr_32 = V_MAX3_F32_e64 0, %95:vgpr_32, 0, %71:vgpr_32, 0, %72:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %97:vgpr_32 = V_MAX3_F32_e64 0, %96:vgpr_32, 0, %73:vgpr_32, 0, %74:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %98:vgpr_32 = V_MAX3_F32_e64 0, %97:vgpr_32, 0, %75:vgpr_32, 0, %76:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %99:vgpr_32 = V_MAX3_F32_e64 0, %98:vgpr_32, 0, %77:vgpr_32, 0, %78:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %100:vgpr_32 = V_MAX3_F32_e64 0, %99:vgpr_32, 0, %79:vgpr_32, 0, %80:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %101:vgpr_32 = V_MAX3_F32_e64 0, %100:vgpr_32, 0, %81:vgpr_32, 0, %82:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %102:vgpr_32 = V_MAX3_F32_e64 0, %101:vgpr_32, 0, %83:vgpr_32, 0, %84:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %103:vgpr_32 = V_MAX3_F32_e64 0, %102:vgpr_32, 0, %85:vgpr_32, 0, %86:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %104:vgpr_32 = V_MAX3_F32_e64 0, %103:vgpr_32, 0, %87:vgpr_32, 0, %88:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %105:vgpr_32 = DS_BPERMUTE_B32 %106:vgpr_32, %104:vgpr_32, 0, implicit $exec |
| %107:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %105:vgpr_32, %105:vgpr_32, implicit $mode, implicit $exec |
| %108:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %104:vgpr_32, %107:vgpr_32, implicit $mode, implicit $exec |
| %109:vgpr_32 = DS_BPERMUTE_B32 %106:vgpr_32, %108:vgpr_32, 0, implicit $exec |
| %110:vgpr_32 = V_CNDMASK_B32_e64 0, %109:vgpr_32, 0, %108:vgpr_32, %12:sreg_64_xexec, implicit $exec |
| %111:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %110:vgpr_32, %110:vgpr_32, implicit $mode, implicit $exec |
| %112:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %13:vgpr_32, %13:vgpr_32, implicit $mode, implicit $exec |
| %113:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %112:vgpr_32, %111:vgpr_32, implicit $mode, implicit $exec |
| %114:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub0:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %115:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %114:vgpr_32, implicit $mode, implicit $exec |
| %116:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %115:vgpr_32, implicit $mode, implicit $exec |
| %117:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub1:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %118:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %117:vgpr_32, implicit $mode, implicit $exec |
| %119:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %118:vgpr_32, implicit $mode, implicit $exec |
| %120:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub2:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %121:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %120:vgpr_32, implicit $mode, implicit $exec |
| %122:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %121:vgpr_32, implicit $mode, implicit $exec |
| %123:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub3:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %124:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %123:vgpr_32, implicit $mode, implicit $exec |
| %125:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %124:vgpr_32, implicit $mode, implicit $exec |
| %126:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub4:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %127:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %126:vgpr_32, implicit $mode, implicit $exec |
| %128:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %127:vgpr_32, implicit $mode, implicit $exec |
| %129:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub5:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %130:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %129:vgpr_32, implicit $mode, implicit $exec |
| %131:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %130:vgpr_32, implicit $mode, implicit $exec |
| %132:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub6:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %133:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %132:vgpr_32, implicit $mode, implicit $exec |
| %134:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %133:vgpr_32, implicit $mode, implicit $exec |
| %135:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub7:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %136:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %135:vgpr_32, implicit $mode, implicit $exec |
| %137:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %136:vgpr_32, implicit $mode, implicit $exec |
| %138:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub8:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %139:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %138:vgpr_32, implicit $mode, implicit $exec |
| %140:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %139:vgpr_32, implicit $mode, implicit $exec |
| %141:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub9:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %142:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %141:vgpr_32, implicit $mode, implicit $exec |
| %143:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %142:vgpr_32, implicit $mode, implicit $exec |
| %144:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub10:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %145:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %144:vgpr_32, implicit $mode, implicit $exec |
| %146:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %145:vgpr_32, implicit $mode, implicit $exec |
| %147:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub11:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %148:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %147:vgpr_32, implicit $mode, implicit $exec |
| %149:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %148:vgpr_32, implicit $mode, implicit $exec |
| %150:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub12:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %151:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %150:vgpr_32, implicit $mode, implicit $exec |
| %152:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %151:vgpr_32, implicit $mode, implicit $exec |
| %153:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub13:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %154:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %153:vgpr_32, implicit $mode, implicit $exec |
| %155:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %154:vgpr_32, implicit $mode, implicit $exec |
| %156:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub14:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %157:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %156:vgpr_32, implicit $mode, implicit $exec |
| %158:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %157:vgpr_32, implicit $mode, implicit $exec |
| %159:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %43.sub15:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %160:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %159:vgpr_32, implicit $mode, implicit $exec |
| %161:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %160:vgpr_32, implicit $mode, implicit $exec |
| %162:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub0:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %163:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %162:vgpr_32, implicit $mode, implicit $exec |
| %164:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %163:vgpr_32, implicit $mode, implicit $exec |
| %165:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub1:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %166:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %165:vgpr_32, implicit $mode, implicit $exec |
| %167:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %166:vgpr_32, implicit $mode, implicit $exec |
| %168:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub2:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %169:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %168:vgpr_32, implicit $mode, implicit $exec |
| %170:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %169:vgpr_32, implicit $mode, implicit $exec |
| %171:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub3:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %172:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %171:vgpr_32, implicit $mode, implicit $exec |
| %173:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %172:vgpr_32, implicit $mode, implicit $exec |
| %174:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub4:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %175:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %174:vgpr_32, implicit $mode, implicit $exec |
| %176:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %175:vgpr_32, implicit $mode, implicit $exec |
| %177:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub5:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %178:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %177:vgpr_32, implicit $mode, implicit $exec |
| %179:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %178:vgpr_32, implicit $mode, implicit $exec |
| %180:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub6:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %181:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %180:vgpr_32, implicit $mode, implicit $exec |
| %182:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %181:vgpr_32, implicit $mode, implicit $exec |
| %183:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub7:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %184:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %183:vgpr_32, implicit $mode, implicit $exec |
| %185:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %184:vgpr_32, implicit $mode, implicit $exec |
| %186:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub8:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %187:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %186:vgpr_32, implicit $mode, implicit $exec |
| %188:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %187:vgpr_32, implicit $mode, implicit $exec |
| %189:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub9:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %190:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %189:vgpr_32, implicit $mode, implicit $exec |
| %191:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %190:vgpr_32, implicit $mode, implicit $exec |
| %192:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub10:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %193:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %192:vgpr_32, implicit $mode, implicit $exec |
| %194:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %193:vgpr_32, implicit $mode, implicit $exec |
| %195:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub11:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %196:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %195:vgpr_32, implicit $mode, implicit $exec |
| %197:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %196:vgpr_32, implicit $mode, implicit $exec |
| %198:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub12:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %199:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %198:vgpr_32, implicit $mode, implicit $exec |
| %200:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %199:vgpr_32, implicit $mode, implicit $exec |
| %201:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub13:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %202:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %201:vgpr_32, implicit $mode, implicit $exec |
| %203:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %202:vgpr_32, implicit $mode, implicit $exec |
| %204:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub14:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %205:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %204:vgpr_32, implicit $mode, implicit $exec |
| %206:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %205:vgpr_32, implicit $mode, implicit $exec |
| %207:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %10.sub4:sgpr_512, 0, %46.sub15:vreg_512_align2, 1, %113:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %208:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %207:vgpr_32, implicit $mode, implicit $exec |
| %209:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %208:vgpr_32, implicit $mode, implicit $exec |
| %210:vgpr_32 = contract nofpexcept V_ADD_F32_e32 0, %116:vgpr_32, implicit $mode, implicit $exec |
| %211:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %119:vgpr_32, %210:vgpr_32, implicit $mode, implicit $exec |
| %212:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %122:vgpr_32, %211:vgpr_32, implicit $mode, implicit $exec |
| %213:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %125:vgpr_32, %212:vgpr_32, implicit $mode, implicit $exec |
| %214:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %128:vgpr_32, %213:vgpr_32, implicit $mode, implicit $exec |
| %215:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %131:vgpr_32, %214:vgpr_32, implicit $mode, implicit $exec |
| %216:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %134:vgpr_32, %215:vgpr_32, implicit $mode, implicit $exec |
| %217:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %137:vgpr_32, %216:vgpr_32, implicit $mode, implicit $exec |
| %218:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %140:vgpr_32, %217:vgpr_32, implicit $mode, implicit $exec |
| %219:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %143:vgpr_32, %218:vgpr_32, implicit $mode, implicit $exec |
| %220:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %146:vgpr_32, %219:vgpr_32, implicit $mode, implicit $exec |
| %221:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %149:vgpr_32, %220:vgpr_32, implicit $mode, implicit $exec |
| %222:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %152:vgpr_32, %221:vgpr_32, implicit $mode, implicit $exec |
| %223:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %155:vgpr_32, %222:vgpr_32, implicit $mode, implicit $exec |
| %224:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %158:vgpr_32, %223:vgpr_32, implicit $mode, implicit $exec |
| %225:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %161:vgpr_32, %224:vgpr_32, implicit $mode, implicit $exec |
| %226:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %164:vgpr_32, %225:vgpr_32, implicit $mode, implicit $exec |
| %227:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %167:vgpr_32, %226:vgpr_32, implicit $mode, implicit $exec |
| %228:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %170:vgpr_32, %227:vgpr_32, implicit $mode, implicit $exec |
| %229:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %173:vgpr_32, %228:vgpr_32, implicit $mode, implicit $exec |
| %230:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %176:vgpr_32, %229:vgpr_32, implicit $mode, implicit $exec |
| %231:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %179:vgpr_32, %230:vgpr_32, implicit $mode, implicit $exec |
| %232:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %182:vgpr_32, %231:vgpr_32, implicit $mode, implicit $exec |
| %233:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %185:vgpr_32, %232:vgpr_32, implicit $mode, implicit $exec |
| %234:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %188:vgpr_32, %233:vgpr_32, implicit $mode, implicit $exec |
| %235:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %191:vgpr_32, %234:vgpr_32, implicit $mode, implicit $exec |
| %236:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %194:vgpr_32, %235:vgpr_32, implicit $mode, implicit $exec |
| %237:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %197:vgpr_32, %236:vgpr_32, implicit $mode, implicit $exec |
| %238:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %200:vgpr_32, %237:vgpr_32, implicit $mode, implicit $exec |
| %239:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %203:vgpr_32, %238:vgpr_32, implicit $mode, implicit $exec |
| %240:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %206:vgpr_32, %239:vgpr_32, implicit $mode, implicit $exec |
| %241:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %209:vgpr_32, %240:vgpr_32, implicit $mode, implicit $exec |
| %242:vgpr_32 = DS_BPERMUTE_B32 %106:vgpr_32, %241:vgpr_32, 0, implicit $exec |
| %243:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %241:vgpr_32, %242:vgpr_32, implicit $mode, implicit $exec |
| %244:vgpr_32 = DS_BPERMUTE_B32 %106:vgpr_32, %243:vgpr_32, 0, implicit $exec |
| %0:vgpr_32 = V_CNDMASK_B32_e64 0, %244:vgpr_32, 0, %243:vgpr_32, %12:sreg_64_xexec, implicit $exec |
| %245:vgpr_32 = contract nofpexcept V_SUB_F32_e32 %13:vgpr_32, %113:vgpr_32, implicit $mode, implicit $exec |
| %246:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %245:vgpr_32, implicit $mode, implicit $exec |
| undef %247.sub0:vreg_64_align2 = afn nofpexcept V_EXP_F32_e32 %246:vgpr_32, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %248:vgpr_32 = V_PERM_B32_e64 %40.sub0:vreg_64_align2, %38.sub0:vreg_64_align2, %14:sreg_32, implicit $exec |
| %249:vgpr_32 = V_PERM_B32_e64 %40.sub0:vreg_64_align2, %38.sub0:vreg_64_align2, %15:sreg_32, implicit $exec |
| %250:vgpr_32 = V_PERM_B32_e64 %40.sub1:vreg_64_align2, %38.sub1:vreg_64_align2, %14:sreg_32, implicit $exec |
| %251:vgpr_32 = V_PERM_B32_e64 %40.sub1:vreg_64_align2, %38.sub1:vreg_64_align2, %15:sreg_32, implicit $exec |
| %252:vgpr_32 = V_ADD_U32_e32 %27:sreg_32_xm0, %16:vgpr_32, implicit $exec |
| %253:vgpr_32 = V_AND_B32_e32 536870911, %252:vgpr_32, implicit $exec |
| %254:vgpr_32 = nsw V_MUL_LO_U32_e64 %253:vgpr_32, %17:sreg_32, implicit $exec |
| %255:vgpr_32 = V_ADD_LSHL_U32_e64 %18:vgpr_32, %254:vgpr_32, 1, implicit $exec |
| DS_WRITE_B32_gfx9 %255:vgpr_32, %248:vgpr_32, 0, 0, implicit $exec |
| %256:vgpr_32 = V_LSHL_ADD_U32_e64 %19:vgpr_32, 1, %255:vgpr_32, implicit $exec |
| DS_WRITE_B32_gfx9 %256:vgpr_32, %249:vgpr_32, 0, 0, implicit $exec |
| %257:vgpr_32 = V_LSHL_ADD_U32_e64 %20:vgpr_32, 1, %256:vgpr_32, implicit $exec |
| DS_WRITE_B32_gfx9 %257:vgpr_32, %250:vgpr_32, 0, 0, implicit $exec |
| %258:vgpr_32 = V_LSHL_ADD_U32_e64 %21:vgpr_32, 1, %257:vgpr_32, implicit $exec |
| DS_WRITE_B32_gfx9 %258:vgpr_32, %251:vgpr_32, 0, 0, implicit $exec |
| %0:vgpr_32 = contract nofpexcept V_FMAC_F32_e32 %1:vgpr_32, %247.sub0:vreg_64_align2, %0:vgpr_32, implicit $mode, implicit $exec |
| %259.sub0_sub1:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %259.sub0_sub1:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %259.sub2_sub3:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %259.sub2_sub3:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %259.sub4_sub5:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %259.sub4_sub5:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %259.sub6_sub7:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %259.sub6_sub7:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %259.sub8_sub9:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %259.sub8_sub9:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %259.sub10_sub11:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %259.sub10_sub11:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %259.sub12_sub13:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %259.sub12_sub13:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %259.sub14_sub15:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %259.sub14_sub15:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %260.sub0_sub1:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %260.sub0_sub1:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %260.sub2_sub3:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %260.sub2_sub3:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %260.sub4_sub5:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %260.sub4_sub5:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %260.sub6_sub7:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %260.sub6_sub7:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %260.sub8_sub9:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %260.sub8_sub9:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %260.sub10_sub11:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %260.sub10_sub11:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %260.sub12_sub13:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %260.sub12_sub13:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %260.sub14_sub15:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %260.sub14_sub15:vreg_512_align2, 0, %247:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %261:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %116:vgpr_32, implicit $mode, implicit $exec |
| %262:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %119:vgpr_32, implicit $mode, implicit $exec |
| %263:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %122:vgpr_32, implicit $mode, implicit $exec |
| %264:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %128:vgpr_32, implicit $mode, implicit $exec |
| %265:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %131:vgpr_32, implicit $mode, implicit $exec |
| %266:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %134:vgpr_32, implicit $mode, implicit $exec |
| %267:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %140:vgpr_32, implicit $mode, implicit $exec |
| %268:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %143:vgpr_32, implicit $mode, implicit $exec |
| %269:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %146:vgpr_32, implicit $mode, implicit $exec |
| %270:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %152:vgpr_32, implicit $mode, implicit $exec |
| %271:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %155:vgpr_32, implicit $mode, implicit $exec |
| %272:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %158:vgpr_32, implicit $mode, implicit $exec |
| %273:vgpr_32 = V_ADD_U32_e32 %22:vgpr_32, %37:vgpr_32, implicit $exec |
| %274:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %273:vgpr_32, %8:sgpr_128, 0, 0, 0, 0, implicit $exec |
| %275:vgpr_32 = V_ADD_U32_e32 %23:vgpr_32, %37:vgpr_32, implicit $exec |
| %276:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %275:vgpr_32, %8:sgpr_128, 0, 0, 0, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %277:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 0, 0, implicit $exec |
| %278:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 576, 0, implicit $exec |
| %279:vreg_128_align2 = DS_READ_B128_gfx9 %25:vgpr_32, 0, 0, implicit $exec |
| %280:vreg_128_align2 = DS_READ_B128_gfx9 %25:vgpr_32, 576, 0, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %281:vgpr_32 = V_PERM_B32_e64 %276.sub0:vreg_64_align2, %274.sub0:vreg_64_align2, %14:sreg_32, implicit $exec |
| %282:vgpr_32 = V_PERM_B32_e64 %276.sub0:vreg_64_align2, %274.sub0:vreg_64_align2, %15:sreg_32, implicit $exec |
| %283:vgpr_32 = V_PERM_B32_e64 %276.sub1:vreg_64_align2, %274.sub1:vreg_64_align2, %14:sreg_32, implicit $exec |
| %284:vgpr_32 = V_PERM_B32_e64 %276.sub1:vreg_64_align2, %274.sub1:vreg_64_align2, %15:sreg_32, implicit $exec |
| DS_WRITE_B32_gfx9 %255:vgpr_32, %281:vgpr_32, 0, 0, implicit $exec |
| DS_WRITE_B32_gfx9 %256:vgpr_32, %282:vgpr_32, 0, 0, implicit $exec |
| DS_WRITE_B32_gfx9 %257:vgpr_32, %283:vgpr_32, 0, 0, implicit $exec |
| DS_WRITE_B32_gfx9 %258:vgpr_32, %284:vgpr_32, 0, 0, implicit $exec |
| %285:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %125:vgpr_32, implicit $mode, implicit $exec |
| %286:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %137:vgpr_32, implicit $mode, implicit $exec |
| %287:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %149:vgpr_32, implicit $mode, implicit $exec |
| %288:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %161:vgpr_32, implicit $mode, implicit $exec |
| undef %289.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %263:vgpr_32, 0, %285:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %289.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %261:vgpr_32, 0, %262:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %290.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %266:vgpr_32, 0, %286:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %290.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %264:vgpr_32, 0, %265:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %291.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %269:vgpr_32, 0, %287:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %291.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %267:vgpr_32, 0, %268:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %292.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %272:vgpr_32, 0, %288:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %292.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %270:vgpr_32, 0, %271:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %259:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %277.sub0_sub1:vreg_128_align2, %289:vreg_64_align2, %259:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %259:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %277.sub2_sub3:vreg_128_align2, %290:vreg_64_align2, %259:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %260:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %278.sub0_sub1:vreg_128_align2, %289:vreg_64_align2, %260:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %260:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %278.sub2_sub3:vreg_128_align2, %290:vreg_64_align2, %260:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %259:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %279.sub0_sub1:vreg_128_align2, %291:vreg_64_align2, %259:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %259:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %279.sub2_sub3:vreg_128_align2, %292:vreg_64_align2, %259:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %260:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %280.sub0_sub1:vreg_128_align2, %291:vreg_64_align2, %260:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %260:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %280.sub2_sub3:vreg_128_align2, %292:vreg_64_align2, %260:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %293:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %209:vgpr_32, implicit $mode, implicit $exec |
| %294:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %203:vgpr_32, implicit $mode, implicit $exec |
| %295:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %197:vgpr_32, implicit $mode, implicit $exec |
| %296:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %191:vgpr_32, implicit $mode, implicit $exec |
| %297:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %185:vgpr_32, implicit $mode, implicit $exec |
| %298:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %179:vgpr_32, implicit $mode, implicit $exec |
| %299:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %173:vgpr_32, implicit $mode, implicit $exec |
| %300:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %167:vgpr_32, implicit $mode, implicit $exec |
| %301:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %206:vgpr_32, implicit $mode, implicit $exec |
| %302:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %200:vgpr_32, implicit $mode, implicit $exec |
| %303:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %194:vgpr_32, implicit $mode, implicit $exec |
| %304:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %188:vgpr_32, implicit $mode, implicit $exec |
| %305:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %182:vgpr_32, implicit $mode, implicit $exec |
| %306:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %176:vgpr_32, implicit $mode, implicit $exec |
| %307:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %170:vgpr_32, implicit $mode, implicit $exec |
| %308:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %164:vgpr_32, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| undef %309.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %307:vgpr_32, 0, %299:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %309.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %308:vgpr_32, 0, %300:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %310.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %305:vgpr_32, 0, %297:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %310.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %306:vgpr_32, 0, %298:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %311.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %303:vgpr_32, 0, %295:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %311.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %304:vgpr_32, 0, %296:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| undef %312.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %301:vgpr_32, 0, %293:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %312.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %302:vgpr_32, 0, %294:vgpr_32, 0, 0, implicit $mode, implicit $exec |
| %313:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 0, 0, implicit $exec |
| %259:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %313.sub0_sub1:vreg_128_align2, %309:vreg_64_align2, %259:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %259:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %313.sub2_sub3:vreg_128_align2, %310:vreg_64_align2, %259:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %314:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 576, 0, implicit $exec |
| %260:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %314.sub0_sub1:vreg_128_align2, %309:vreg_64_align2, %260:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %260:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %314.sub2_sub3:vreg_128_align2, %310:vreg_64_align2, %260:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %315:vreg_128_align2 = DS_READ_B128_gfx9 %25:vgpr_32, 0, 0, implicit $exec |
| %259:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %315.sub0_sub1:vreg_128_align2, %311:vreg_64_align2, %259:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %259:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %315.sub2_sub3:vreg_128_align2, %312:vreg_64_align2, %259:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %316:vreg_128_align2 = DS_READ_B128_gfx9 %25:vgpr_32, 576, 0, implicit $exec |
| %260:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %316.sub0_sub1:vreg_128_align2, %311:vreg_64_align2, %260:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| %260:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %316.sub2_sub3:vreg_128_align2, %312:vreg_64_align2, %260:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec |
| INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0 |
| %37:vgpr_32 = V_ADD_U32_e32 %26:sreg_32, %37:vgpr_32, implicit $exec |
| %29:vgpr_32 = nuw V_ADD_U32_e32 64, %29:vgpr_32, implicit $exec |
| S_ENDPGM 0 |
| ... |