| # RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -start-before=amdgpu-lower-vgpr-encoding -o - %s | FileCheck -check-prefixes=GCN,ASM %s |
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -start-before=amdgpu-lower-vgpr-encoding -o - %s | llvm-mc -triple=amdgcn -mcpu=gfx1250 -filetype=obj -o - | llvm-objdump -d --mcpu=gfx1250 - | FileCheck -check-prefixes=GCN,DIS %s |
| |
| # ASM-LABEL: {{^}}high_vgprs: |
| # DIS-LABEL: <high_vgprs>: |
| --- |
| name: high_vgprs |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; ASM: %bb.0: |
| |
| ; VOP1 |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x41 |
| ; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v255 /*v511*/ |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr511, implicit $exec |
| |
| ; No mask change |
| ; GCN-NEXT: v_mov_b32_e32 v1 /*v257*/, v254 /*v510*/ |
| $vgpr257 = V_MOV_B32_e32 undef $vgpr510, implicit $exec |
| |
| ; Single bit change |
| ; GCN-NEXT: s_set_vgpr_msb 0x4101 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: v_rcp_f32_e64 v255, v2 /*v258*/ |
| $vgpr255 = V_RCP_F32_e64 0, undef $vgpr258, 0, 0, implicit $exec, implicit $mode |
| |
| ; Reset |
| ; GCN-NEXT: s_set_vgpr_msb 0x100 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_rcp_f32_e64 v255, v1 |
| $vgpr255 = V_RCP_F32_e64 0, undef $vgpr1, 0, 0, implicit $exec, implicit $mode |
| |
| ; VOP2 |
| |
| ; GCN-NEXT: s_set_vgpr_msb 5 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=1 src2=0 |
| ; GCN-NEXT: v_add_nc_u32_e32 v0, v253 /*v509*/, v252 /*v508*/ |
| $vgpr0 = V_ADD_U32_e32 undef $vgpr509, undef $vgpr508, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x544 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0 |
| ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GCN-NEXT: v_add_f32_e64 v2 /*v258*/, v0, v251 /*v507*/ |
| $vgpr258 = V_ADD_F32_e64 0, $vgpr0, 0, undef $vgpr507, 0, 0, implicit $exec, implicit $mode |
| |
| ; VOP3 |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4455 |
| ; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=1 |
| ; GCN-NEXT: v_fma_f32 v3 /*v259*/, v4 /*v260*/, v5 /*v261*/, v6 /*v262*/ |
| $vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode |
| |
| ; No change |
| ; GCN-NEXT: v_fma_f32 v3 /*v259*/, v4 /*v260*/, v5 /*v261*/, v6 /*v262*/ |
| $vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode |
| |
| ; Tuple crossing the 256 boundary |
| ; GCN-NEXT: s_set_vgpr_msb 0x5511 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=1 |
| ; GCN-NEXT: v_mqsad_u32_u8 v[254:257], v[2:3] /*v[258:259]*/, v0, v[244:247] /*v[500:503]*/ |
| $vgpr254_vgpr255_vgpr256_vgpr257 = V_MQSAD_U32_U8_e64 $vgpr258_vgpr259, $vgpr0, undef $vgpr500_vgpr501_vgpr502_vgpr503, 0, implicit $exec |
| |
| ; DPP/tied operand |
| ; GCN-NEXT: s_set_vgpr_msb 0x1145 |
| ; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=0 |
| ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GCN-NEXT: v_add_nc_u16_e64_dpp v0 /*v256*/, v1 /*v257*/, v2 /*v258*/ quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
| $vgpr256 = V_ADD_NC_U16_fake16_e64_dpp $vgpr256, 0, $vgpr257, 0, undef $vgpr258, 0, 0, 1, 15, 15, 1, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4511 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=1 |
| ; GCN-NEXT: v_add3_u32_e64_dpp v0, v1 /*v257*/, v0, v2 /*v258*/ quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
| $vgpr0 = V_ADD3_U32_e64_dpp $vgpr0, $vgpr257, $vgpr0, undef $vgpr258, 1, 15, 15, 1, implicit $exec |
| |
| ; DS (addr, data0, and data1 operands) |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x1114 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=1 src2=1 |
| ; GCN-NEXT: ds_store_2addr_b32 v0, v248 /*v504*/, v249 /*v505*/ offset1:1 |
| DS_WRITE2_B32_gfx9 $vgpr0, undef $vgpr504, undef $vgpr505, 0, 1, 0, implicit $exec |
| |
| ; Reset |
| ; GCN-NEXT: s_set_vgpr_msb 0x1400 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: ds_store_2addr_b32 v0, v248, v249 offset1:1 |
| DS_WRITE2_B32_gfx9 $vgpr0, undef $vgpr248, undef $vgpr249, 0, 1, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 1 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: ds_load_b32 v0, v255 /*v511*/ |
| $vgpr0 = DS_READ_B32_gfx9 $vgpr511, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x144 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0 |
| ; GCN-NEXT: ds_add_rtn_u32 v255 /*v511*/, v0, v248 /*v504*/ |
| $vgpr511 = DS_ADD_RTN_U32_gfx9 $vgpr0, undef $vgpr504, 0, 0, implicit $exec |
| |
| ; Reset |
| ; GCN-NEXT: s_set_vgpr_msb 0x4400 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: ds_add_rtn_u32 v0, v0, v0 |
| $vgpr0 = DS_ADD_RTN_U32_gfx9 $vgpr0, $vgpr0, 0, 0, implicit $exec |
| |
| ; FLAT (vaddr, vdata and vdst operands) |
| |
| ; GCN-NEXT: s_set_vgpr_msb 1 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: global_load_b32 v2, v[2:3] /*v[258:259]*/, off |
| $vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr258_vgpr259, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x140 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: global_load_b32 v255 /*v511*/, v0, s[0:1] |
| $vgpr511 = GLOBAL_LOAD_DWORD_SADDR undef $sgpr0_sgpr1, $vgpr0, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4001 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: scratch_load_u8 v0, v255 /*v511*/, s0 |
| $vgpr0 = SCRATCH_LOAD_UBYTE_SVS $vgpr511, undef $sgpr0, 0, 0, implicit $exec, implicit $flat_scr |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x100 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: global_store_b32 v[0:1], v2, off |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 5 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=1 src2=0 |
| ; GCN-NEXT: global_store_b32 v[0:1] /*v[256:257]*/, v255 /*v511*/, off |
| GLOBAL_STORE_DWORD $vgpr256_vgpr257, $vgpr511, 0, 0, implicit $exec |
| |
| ; No change |
| ; GCN-NEXT: global_store_b96 v[0:1] /*v[256:257]*/, v[244:246] /*v[500:502]*/, off |
| GLOBAL_STORE_DWORDX3 $vgpr256_vgpr257, $vgpr500_vgpr501_vgpr502, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x544 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0 |
| ; GCN-NEXT: flat_atomic_add_u32 v254 /*v510*/, v[0:1], v255 /*v511*/ th:TH_ATOMIC_RETURN |
| $vgpr510 = FLAT_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr511, 0, 1, implicit $exec, implicit $flat_scr |
| |
| ; Reset |
| ; GCN-NEXT: s_set_vgpr_msb 0x4400 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: flat_atomic_add_u32 v0, v[0:1], v255 th:TH_ATOMIC_RETURN |
| $vgpr0 = FLAT_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr255, 0, 1, implicit $exec, implicit $flat_scr |
| |
| ; VBUFFER (vdata, vaddr operands) |
| |
| ; GCN-NEXT: buffer_load_b32 v1, v0, s[8:11], s3 offen |
| $vgpr1 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN $vgpr0, undef $sgpr8_sgpr9_sgpr10_sgpr11, undef $sgpr3, 0, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: buffer_load_b32 v1 /*v257*/, v0, s[8:11], s3 offen |
| $vgpr257 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN $vgpr0, undef $sgpr8_sgpr9_sgpr10_sgpr11, undef $sgpr3, 0, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4041 |
| ; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: buffer_load_b32 v1 /*v257*/, v0 /*v256*/, s[8:11], s3 offen |
| $vgpr257 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN $vgpr256, undef $sgpr8_sgpr9_sgpr10_sgpr11, undef $sgpr3, 0, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4100 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: buffer_store_b32 v0, v1, s[0:3], s3 offen |
| BUFFER_STORE_DWORD_VBUFFER_OFFEN $vgpr0, $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x41 |
| ; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: buffer_store_b32 v0 /*v256*/, v1 /*v257*/, s[0:3], s3 offen |
| BUFFER_STORE_DWORD_VBUFFER_OFFEN $vgpr256, $vgpr257, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4100 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s3 offen |
| BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN $vgpr0, $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x41 |
| ; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: buffer_atomic_add_f32 v0 /*v256*/, v1 /*v257*/, s[0:3], s3 offen |
| BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN $vgpr256, $vgpr257, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, implicit $exec |
| |
| ; VGPRs above 512 |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x41aa |
| ; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=2 |
| ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v514*/, v3 /*v515*/ |
| $vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0xaaab |
| ; ASM-SAME: ; msbs: dst=2 src0=3 src1=2 src2=2 |
| ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v0 /*v768*/, v2 /*v514*/, v3 /*v515*/ |
| $vgpr512 = V_FMA_F32_e64 0, undef $vgpr768, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0xabae |
| ; ASM-SAME: ; msbs: dst=2 src0=2 src1=3 src2=2 |
| ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v770*/, v3 /*v515*/ |
| $vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr770, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0xaeba |
| ; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=3 |
| ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v514*/, v3 /*v771*/ |
| $vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr771, 0, 0, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0xbaea |
| ; ASM-SAME: ; msbs: dst=3 src0=2 src1=2 src2=2 |
| ; GCN-NEXT: v_fma_f32 v255 /*v1023*/, v1 /*v513*/, v2 /*v514*/, v3 /*v515*/ |
| $vgpr1023 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0xeaff |
| ; ASM-SAME: ; msbs: dst=3 src0=3 src1=3 src2=3 |
| ; GCN-NEXT: v_fma_f32 v0 /*v768*/, v1 /*v769*/, v2 /*v770*/, v3 /*v771*/ |
| $vgpr768 = V_FMA_F32_e64 0, undef $vgpr769, 0, undef $vgpr770, 0, undef $vgpr771, 0, 0, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0xff42 |
| ; ASM-SAME: ; msbs: dst=1 src0=2 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v0 /*v512*/ |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr512, implicit $exec |
| |
| ; Reset |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4200 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_fma_f32 v0, v1, v2, v3 |
| $vgpr0 = V_FMA_F32_e64 0, undef $vgpr1, 0, undef $vgpr2, 0, undef $vgpr3, 0, 0, implicit $exec, implicit $mode |
| |
| ; Tuples |
| |
| ; GCN-NEXT: s_set_vgpr_msb 10 |
| ; ASM-SAME: ; msbs: dst=0 src0=2 src1=2 src2=0 |
| ; GCN-NEXT: global_store_b96 v[0:1] /*v[512:513]*/, v[0:2] /*v[512:514]*/, off |
| GLOBAL_STORE_DWORDX3 $vgpr512_vgpr513, $vgpr512_vgpr513_vgpr514, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0xa0b |
| ; ASM-SAME: ; msbs: dst=0 src0=3 src1=2 src2=0 |
| ; GCN-NEXT: global_store_b64 v[254:255] /*v[1022:1023]*/, v[254:255] /*v[766:767]*/, off |
| GLOBAL_STORE_DWORDX2 $vgpr1022_vgpr1023, $vgpr766_vgpr767, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0xb55 |
| ; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=1 |
| ; GCN-NEXT: v_wmma_f32_16x16x32_bf16 v[14:21] /*v[270:277]*/, v[26:33] /*v[282:289]*/, v[34:41] /*v[290:297]*/, v[14:21] /*v[270:277]*/ |
| early-clobber $vgpr270_vgpr271_vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, undef $vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287_vgpr288_vgpr289, 8, undef $vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297, 8, killed undef $vgpr270_vgpr271_vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277, 0, 0, 0, 0, implicit $exec |
| |
| ; ASM: NumVgprs: 1024 |
| |
| ... |
| |
| # ASM-LABEL: {{^}}vopd: |
| |
| # DIS-LABEL: <vopd>: |
| --- |
| name: vopd |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; ASM: %bb.0: |
| |
| ; GCN-NEXT: v_dual_sub_f32 v255, v1, v1 :: v_dual_mul_f32 v6, v0, v0 |
| $vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr1, undef $vgpr0, undef $vgpr0, implicit $mode, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, v1, v2 :: v_dual_mul_f32 v0 /*v256*/, v3, v4 |
| $vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr2, undef $vgpr3, undef $vgpr4, implicit $mode, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4041 |
| ; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, s1, v2 :: v_dual_mul_f32 v0 /*v256*/, v44 /*v300*/, v4 |
| $vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $sgpr1, undef $vgpr2, undef $vgpr300, undef $vgpr4, implicit $mode, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4104 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=1 src2=0 |
| ; GCN-NEXT: v_dual_sub_f32 v255, v1, v44 /*v300*/ :: v_dual_mul_f32 v6, v0, v1 /*v257*/ |
| $vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr300, undef $vgpr0, $vgpr257, implicit $mode, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x401 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: v_dual_sub_f32 v255, 0, v1 :: v_dual_mul_f32 v6, v44 /*v300*/, v3 |
| $vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 0, undef $vgpr1, undef $vgpr300, undef $vgpr3, implicit $mode, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x140 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_dual_fmamk_f32 v243 /*v499*/, v0, 0xa, v3 :: v_dual_fmac_f32 v0 /*v256*/, v1, v1 |
| $vgpr499, $vgpr256 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr3, undef $vgpr1, undef $vgpr1, $vgpr256, implicit $mode, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4005 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=1 src2=0 |
| ; GCN-NEXT: v_dual_mov_b32 v2, v3 /*v259*/ :: v_dual_add_f32 v3, v1 /*v257*/, v2 /*v258*/ |
| $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1250 undef $vgpr259, undef $vgpr257, undef $vgpr258, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x554 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=1 |
| ; GCN-NEXT: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/ :: v_dual_fmac_f32 v3 /*v259*/, v1, v1 /*v257*/ |
| $vgpr500, $vgpr259 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr300, undef $vgpr1, undef $vgpr257, $vgpr259, implicit $mode, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x5410 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=1 |
| ; GCN-NEXT: v_dual_fma_f32 v0, v6, v6, v44 /*v300*/ :: v_dual_fma_f32 v1, v4, v5, v45 /*v301*/ |
| $vgpr0, $vgpr1 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, 0, undef $vgpr300, 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $vgpr301, implicit $mode, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x1000 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_dual_fmac_f32 v2, v6, v6 :: v_dual_fma_f32 v3, v4, v5, v3 |
| $vgpr2, $vgpr3 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, undef $vgpr2, 0, undef $vgpr4, 0, undef $vgpr5, 0, $vgpr3, implicit $mode, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_dual_fma_f32 v244 /*v500*/, v6, v7, v8 :: v_dual_add_f32 v3 /*v259*/, v4, v5 |
| $vgpr500, $vgpr259 = V_DUAL_FMA_F32_e64_X_ADD_F32_e32_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr7, 0, undef $vgpr8, 0, undef $vgpr4, 0, undef $vgpr5, implicit $mode, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x40ae |
| ; ASM-SAME: ; msbs: dst=2 src0=2 src1=3 src2=2 |
| ; GCN-NEXT: v_dual_fmac_f32 v2 /*v514*/, v6 /*v518*/, v8 /*v776*/ :: v_dual_fma_f32 v3 /*v515*/, v4 /*v516*/, v7 /*v775*/, v3 /*v515*/ |
| $vgpr514, $vgpr515 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr518, 0, undef $vgpr776, undef $vgpr514, 0, undef $vgpr516, 0, undef $vgpr775, 0, $vgpr515, implicit $mode, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0xae54 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=1 |
| ; GCN-NEXT: v_dual_fmac_f32 v7 /*v263*/, v1, v1 /*v257*/ :: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/ |
| $vgpr263, $vgpr500 = V_DUAL_FMAC_F32_e32_X_FMAMK_F32_gfx1250 undef $vgpr1, undef $vgpr257, $vgpr263, undef $vgpr0, 10, undef $vgpr300, implicit $mode, implicit $exec |
| |
| ; ASM: NumVgprs: 777 |
| |
| ... |
| |
| # ASM-LABEL: {{^}}fmaak_fmamk: |
| # DIS-LABEL: <fmaak_fmamk>: |
| --- |
| name: fmaak_fmamk |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; ASM: %bb.0: |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x45 |
| ; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=0 |
| ; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1 /*v257*/, v2 /*v258*/, 0x1 |
| $vgpr256 = V_FMAAK_F32 undef $vgpr257, undef $vgpr258, 1, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4505 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=1 src2=0 |
| ; GCN-NEXT: v_fmaak_f32 v0, v1 /*v257*/, v2 /*v258*/, 0x1 |
| $vgpr0 = V_FMAAK_F32 undef $vgpr257, undef $vgpr258, 1, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x541 |
| ; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1 /*v257*/, v2, 0x1 |
| $vgpr256 = V_FMAAK_F32 undef $vgpr257, undef $vgpr2, 1, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4144 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0 |
| ; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1, v2 /*v258*/, 0x1 |
| $vgpr256 = V_FMAAK_F32 undef $vgpr1, undef $vgpr258, 1, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4451 |
| ; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=1 |
| ; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2 /*v258*/ |
| $vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x5111 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=1 |
| ; GCN-NEXT: v_fmamk_f32 v0, v1 /*v257*/, 0x1, v2 /*v258*/ |
| $vgpr0 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x1141 |
| ; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2 |
| $vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr2, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4150 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=1 |
| ; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1, 0x1, v2 /*v258*/ |
| $vgpr256 = V_FMAMK_F32 undef $vgpr1, 1, undef $vgpr258, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x5051 |
| ; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=1 |
| ; GCN-NEXT: v_fmamk_f64 v[4:5] /*v[260:261]*/, v[100:101] /*v[356:357]*/, 0x1, v[2:3] /*v[258:259]*/ |
| $vgpr260_vgpr261 = V_FMAMK_F64 undef $vgpr356_vgpr357, 1, undef $vgpr258_vgpr259, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x5101 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: v_fmamk_f64 v[0:1], v[100:101] /*v[356:357]*/, 0x1, v[2:3] |
| $vgpr0_vgpr1 = V_FMAMK_F64 undef $vgpr356_vgpr357, 1, undef $vgpr2_vgpr3, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x110 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=1 |
| ; GCN-NEXT: v_fmamk_f64 v[0:1], v[2:3], 0x1, v[100:101] /*v[356:357]*/ |
| $vgpr0_vgpr1 = V_FMAMK_F64 undef $vgpr2_vgpr3, 1, undef $vgpr356_vgpr357, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x1040 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_fmamk_f64 v[0:1] /*v[256:257]*/, v[2:3], 0x1, v[4:5] |
| $vgpr256_vgpr257 = V_FMAMK_F64 undef $vgpr2_vgpr3, 1, undef $vgpr4_vgpr5, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4000 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_fmamk_f16 v26, v56, 0x1, v58 |
| $vgpr26 = V_FMAMK_F16_fake16 undef $vgpr56, 1, undef $vgpr58, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: v_fmamk_f16 v0, v35, 0x1, v2 |
| $vgpr0 = V_FMAMK_F16_fake16 undef $vgpr35, 1, undef $vgpr2, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: v_fmamk_f16 v0, v2, 0x1, v6 |
| $vgpr0 = V_FMAMK_F16_fake16 undef $vgpr2, 1, undef $vgpr6, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: v_fmamk_f16 v5, v2, 0x1, v4 |
| $vgpr5 = V_FMAMK_F16_fake16 undef $vgpr2, 1, undef $vgpr4, implicit $exec, implicit $mode |
| |
| ; ASM: NumVgprs: 358 |
| |
| ... |
| |
| # ASM-LABEL: {{^}}fmac: |
| # DIS-LABEL: <fmac>: |
| --- |
| name: fmac |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; ASM: %bb.0: |
| |
| ; Accumulation instructions apply DST to both the destination and one of the source VGPRs |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_fmac_f32_e64 v0 /*v256*/, |v0|, |v1| clamp mul:4 |
| $vgpr256 = V_FMAC_F32_e64 2, undef $vgpr0, 2, undef $vgpr1, 2, undef $vgpr256, 1, 2, implicit $mode, implicit $exec |
| |
| ; GCN-NEXT: v_fmac_f32_e32 v1 /*v257*/, v0, v1 |
| $vgpr257 = V_FMAC_F32_e32 undef $vgpr0, undef $vgpr1, undef $vgpr257, implicit $mode, implicit $exec |
| |
| ; ASM: NumVgprs: 258 |
| |
| ... |
| |
| # ASM-LABEL: {{^}}rev_opcodes: |
| # DIS-LABEL: <rev_opcodes>: |
| --- |
| name: rev_opcodes |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; ASM: %bb.0: |
| |
| ; V_LSHLREV, V_SUBREV: SRC0 and SRC1 apply to the operands in the order in the ISA (before "reversing") |
| ; e.g. v_lshlrev_b32 v0(vdst), v1(src0), v2(src1) // v0 = v2 << v1 |
| ; DST applies to V0, SRC0 applies to V1, and SRC1 applies to V2. |
| |
| ; GCN-NEXT: s_set_vgpr_msb 1 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: v_lshlrev_b32_e64 v0, v0 /*v256*/, v2 |
| $vgpr0 = V_LSHLREV_B32_e64 undef $vgpr256, undef $vgpr2, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x104 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=1 src2=0 |
| ; GCN-NEXT: v_lshlrev_b32_e64 v0, v1, v0 /*v256*/ |
| $vgpr0 = V_LSHLREV_B32_e64 undef $vgpr1, undef $vgpr256, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x401 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: v_subrev_nc_u32_e32 v0, v0 /*v256*/, v2 |
| $vgpr0 = V_SUBREV_U32_e32 undef $vgpr256, undef $vgpr2, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x104 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=1 src2=0 |
| ; GCN-NEXT: v_subrev_nc_u32_e32 v0, v1, v0 /*v256*/ |
| $vgpr0 = V_SUBREV_U32_e32 undef $vgpr1, undef $vgpr256, implicit $exec |
| |
| ; ASM: NumVgprs: 257 |
| ... |
| |
| # ASM-LABEL: {{^}}minimal_mode_change: |
| # DIS-LABEL: <minimal_mode_change>: |
| --- |
| name: minimal_mode_change |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; ASM: %bb.0: |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x55 |
| ; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=1 |
| ; GCN-NEXT: v_fma_f32 v3 /*v259*/, v4 /*v260*/, v5 /*v261*/, v6 /*v262*/ |
| $vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x5500 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_add_nc_u32_e32 v0, v1, v2 |
| $vgpr0 = V_ADD_U32_e32 undef $vgpr1, undef $vgpr2, implicit $exec |
| |
| ; GCN-NEXT: v_mov_b32_e32 v0, v1 |
| $vgpr0 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v3 /*v259*/, v1 |
| $vgpr259 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| |
| ; GCN-NEXT: v_add_nc_u32_e32 v0 /*v256*/, v1, v2 |
| $vgpr256 = V_ADD_U32_e32 undef $vgpr1, undef $vgpr2, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x4000 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_fma_f32 v3, v4, v5, s2 |
| $vgpr3 = V_FMA_F32_e64 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $sgpr2, 0, 0, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 1 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: v_fma_f32 v3, v4 /*v260*/, v5, 1 |
| $vgpr3 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr5, 0, 1, 0, 0, implicit $exec, implicit $mode |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x104 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=1 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v0, v1 |
| $vgpr0 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| |
| ; GCN-NEXT: v_add_nc_u32_e32 v2, v1, v3 /*v259*/ |
| $vgpr2 = V_ADD_U32_e32 undef $vgpr1, undef $vgpr259, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x401 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v0, v0 /*v256*/ |
| ; GCN-NEXT: v_add_nc_u32_e32 v1, v1 /*v257*/, v1 |
| ; GCN-NEXT: s_set_vgpr_msb 0x105 |
| ; GCN-NEXT: v_add_nc_u32_e32 v2, v2 /*v258*/, v2 /*v258*/ |
| $vgpr0 = V_MOV_B32_e32 undef $vgpr256, implicit $exec |
| $vgpr1 = V_ADD_U32_e32 undef $vgpr257, undef $vgpr1, implicit $exec |
| $vgpr2 = V_ADD_U32_e32 undef $vgpr258, undef $vgpr258, implicit $exec |
| |
| ; ASM: NumVgprs: 263 |
| |
| ... |
| |
| # ASM-LABEL: {{^}}terminators: |
| # DIS-LABEL: <terminators>: |
| --- |
| name: terminators |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; ASM: %bb.0: |
| ; GCN-NEXT: s_nop 0 |
| ; GCN-NEXT: s_branch |
| S_NOP 0 |
| S_BRANCH %bb.1 |
| |
| ; No mode switch if it was zero |
| |
| bb.1: |
| ; ASM: .LBB{{.*_1}}: |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 |
| ; GCN-NEXT: s_set_vgpr_msb 0x4000 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| |
| ; Reset on fallthrough block end |
| |
| bb.2: |
| ; ASM-NEXT: %bb.2: |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 |
| ; GCN-NEXT: s_set_vgpr_msb 0x4000 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: s_branch |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| S_BRANCH %bb.3 |
| |
| ; Reset mode on terminator |
| |
| bb.3: |
| ; ASM: .LBB{{.*_3}}: |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 |
| ; GCN-NEXT: s_set_vgpr_msb 0x4000 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: s_swap_pc_i64 |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $exec = S_SWAPPC_B64 undef $sgpr0_sgpr1 |
| |
| ; Reset mode before a call |
| |
| bb.4: |
| ; ASM-NEXT: %bb.4: |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 |
| ; GCN-NEXT: s_endpgm |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| S_ENDPGM 0 |
| |
| ; No mode reset before S_ENDPGM |
| |
| bb.5: |
| ; ASM-NEXT: %bb.5: |
| ; GCN-NEXT: v_mov_b32_e32 v0, v1 |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 |
| ; GCN-NEXT: s_set_vgpr_msb 0x4000 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: s_set_pc_i64 |
| $vgpr0 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| S_SETPC_B64 undef $sgpr0_sgpr1, implicit-def $exec |
| |
| ; Assume mode zero at block begin even if we did not reset if before |
| ; Reset mode before branch |
| |
| bb.6: |
| ; ASM-NEXT: %bb.6: |
| ; GCN-NEXT: s_set_pc_i64 |
| S_SETPC_B64 undef $sgpr0_sgpr1, implicit-def $exec |
| |
| ; But do not reset mode before a branch if it was zero |
| |
| bb.7: |
| ; ASM-NEXT: %bb.7: |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 |
| ; GCN-NEXT: s_set_vgpr_msb 0x4000 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; ASM-NEXT: ; return to shader part epilog |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| SI_RETURN_TO_EPILOG undef $vgpr0, implicit-def $exec |
| |
| ; Reset mode before returning from a call |
| |
| bb.8: |
| ; ASM-NEXT: %bb.8: |
| ; ASM-NEXT: ; return to shader part epilog |
| SI_RETURN_TO_EPILOG undef $vgpr0, implicit-def $exec |
| |
| ; But do not reset mode before a call return if it was zero |
| |
| bb.9: |
| ; ASM-NEXT: %bb.9: |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 |
| ; GCN-NEXT: s_set_vgpr_msb 0x4000 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: s_set_pc_i64 |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| S_SETPC_B64_return undef $sgpr0_sgpr1, implicit-def $exec |
| |
| ; ASM: NumVgprs: 257 |
| ... |
| |
| # ASM-LABEL: {{^}}control_flow: |
| # DIS-LABEL: <control_flow>: |
| --- |
| name: control_flow |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; ASM: %bb.0: |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v0 |
| ; GCN-NEXT: s_set_vgpr_msb 0x4000 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr0, implicit $exec |
| |
| bb.1: |
| ; ASM: .LBB{{[0-9]+}}_1: |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v1 /*v257*/, v1 |
| ; GCN-NEXT: s_set_vgpr_msb 0x4000 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: s_cbranch_scc0 |
| $vgpr257 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| S_CBRANCH_SCC0 %bb.1, undef implicit $scc |
| |
| bb.2: |
| ; ASM: %bb.2: |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v2 /*v258*/, v2 |
| ; GCN-NEXT: s_endpgm |
| $vgpr258 = V_MOV_B32_e32 undef $vgpr2, implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| # ASM-LABEL: {{^}}inline_asm: |
| # DIS-LABEL: <inline_asm>: |
| --- |
| name: inline_asm |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; ASM: %bb.0: |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 |
| ; GCN-NEXT: s_set_vgpr_msb 0x4000 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; ASM: def v0 |
| ; GCN-NOT: s_set_vgpr_msb |
| ; ASM: use v0 |
| ; GCN-NOT: s_set_vgpr_msb |
| ; ASM: use v1 |
| ; GCN: s_set_vgpr_msb 64 |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 |
| ; GCN-NOT: s_set_vgpr_msb |
| ; ASM: no vgprs, mode preserved |
| ; GCN-NOT: s_set_vgpr_msb |
| ; GCN: v_mov_b32_e32 v0 /*v256*/, v1 |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| INLINEASM &"; def v0", 1, 327690, def $vgpr0 |
| INLINEASM &"; use v0", 1, 327690, $vgpr0 |
| INLINEASM &"; use v1", 1, 327690, undef $vgpr1 |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| INLINEASM &"; no vgprs, mode preserved", 1, 327690, undef $sgpr0 |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| |
| ; ASM: NumVgprs: 257 |
| ... |
| |
| # ASM-LABEL: {{^}}bundle: |
| # DIS-LABEL: <bundle>: |
| --- |
| name: bundle |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; ASM: %bb.0: |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 |
| ; GCN-NEXT: s_nop 0 |
| ; GCN-NEXT: s_set_vgpr_msb 0x4001 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, v0 /*v256*/ |
| BUNDLE implicit-def $vgpr256 { |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| } |
| BUNDLE implicit $vgpr256 { |
| S_NOP 0 |
| $vgpr1 = V_MOV_B32_e32 $vgpr256, implicit $exec |
| } |
| |
| ; ASM: NumVgprs: 257 |
| ... |
| |
| # ASM-LABEL: {{^}}hard_clauses: |
| # DIS-LABEL: <hard_clauses>: |
| --- |
| name: hard_clauses |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; ASM: %bb.0: |
| |
| ; s_set_vgpr_msb cannot be a first instruction in a clause and must be placed before it. |
| |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: s_clause 0x2 |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 |
| ; GCN-NEXT: v_mov_b32_e32 v1 /*v257*/, v1 |
| ; GCN-NEXT: v_mov_b32_e32 v2 /*v258*/, v1 |
| BUNDLE implicit-def $vgpr256, implicit-def $vgpr257, implicit-def $vgpr248, implicit undef $vgpr1 { |
| S_CLAUSE 2 |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr257 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr258 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| } |
| |
| ; S_CLAUSE 515 means 4 instructions broken in groups of 2. |
| ; A mode change cannot be a first instruction of each group. |
| ; If we cannot insert a mode change right before the clause just drop it. |
| |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 |
| ; GCN-NEXT: v_mov_b32_e32 v1 /*v257*/, v1 |
| ; GCN-NEXT: s_set_vgpr_msb 0x4000 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, v1 |
| ; GCN-NEXT: v_mov_b32_e32 v3, v1 |
| BUNDLE implicit-def $vgpr256, implicit-def $vgpr257, implicit-def $vgpr2, implicit-def $vgpr3, implicit undef $vgpr1 { |
| S_CLAUSE 515 |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr257 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr2 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr3 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| } |
| |
| ; Check that we properly update the clause length. |
| |
| ; GCN-NEXT: s_clause 0x3 |
| ; GCN-NEXT: v_mov_b32_e32 v0, v1 |
| ; GCN-NEXT: s_set_vgpr_msb 64 |
| ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v1 /*v257*/, v1 |
| ; GCN-NEXT: v_mov_b32_e32 v2 /*v258*/, v1 |
| BUNDLE implicit-def $vgpr0, implicit-def $vgpr257, implicit-def $vgpr248, implicit undef $vgpr1 { |
| S_CLAUSE 2 |
| $vgpr0 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr257 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr258 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| } |
| |
| ; Check that we do not exceed the limit of 63 instructions or simm16 value of 62. |
| |
| ; GCN-NEXT: s_clause 0x3e |
| ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 |
| ; GCN-NEXT: s_set_vgpr_msb 0x4000 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, v1 |
| ; GCN-NEXT: v_mov_b32_e32 v2, v1 |
| ; GCN-COUNT-60: v_mov_b32_e32 v1, v1 |
| BUNDLE implicit-def $vgpr256, implicit-def $vgpr1, implicit-def $vgpr2, implicit undef $vgpr1 { |
| S_CLAUSE 62 |
| $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr2 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec |
| } |
| |
| ; ASM: NumVgprs: 259 |
| ... |
| |
| # ASM-LABEL: {{^}}pseudo: |
| # DIS-LABEL: <pseudo>: |
| --- |
| name: pseudo |
| body: | |
| bb.0: |
| liveins: $vgpr0 |
| |
| $sgpr0 = SI_ILLEGAL_COPY killed $vgpr0, implicit-def $exec, implicit-def $vcc, implicit $exec |
| ; Just do not assert here. |
| ; ASM: illegal copy v0 to s0 |
| SI_RETURN_TO_EPILOG killed $sgpr0 |
| S_ENDPGM 0 |
| ... |
| |
| # LD_SCALE operands ignores MSB and always use low 256 VGPRs. |
| |
| # ASM-LABEL: {{^}}ld_scale: |
| # DIS-LABEL: <ld_scale>: |
| --- |
| name: ld_scale |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; ASM: %bb.0: |
| |
| ; GCN: s_set_vgpr_msb 5 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=1 src2=0 |
| ; GCN-NEXT: v_add_nc_u32_e32 v0, v253 /*v509*/, v252 /*v508*/ |
| $vgpr0 = V_ADD_U32_e32 undef $vgpr509, undef $vgpr508, implicit $exec |
| |
| ; Do not change mode for LD_SCALE. |
| |
| ; GCN-NOT: s_set_vgpr_msb |
| ; GCN-NEXT: v_wmma_ld_scale_paired_b32 v1, v2 |
| V_WMMA_LD_SCALE_PAIRED_B32 undef $vgpr1, undef $vgpr2, 0, 0, 0, 0, 0, 0, implicit $exec |
| |
| ; GCN-NOT: s_set_vgpr_msb |
| ; GCN-NEXT: v_wmma_scale_f32_16x16x128_f8f6f4 v[210:217], v[244:259] /*v[500:515]*/, v[244:259] /*v[500:515]*/, v[10:17], v1, v2 |
| $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr1, undef $vgpr2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x500 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_wmma_scale_f32_16x16x128_f8f6f4 v[210:217], v[100:115], v[100:115], v[10:17], v1, v2 |
| $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr1, undef $vgpr2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 1 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: v_wmma_scale_f32_16x16x128_f8f6f4 v[210:217], v[244:259] /*v[500:515]*/, v[0:15], v[10:17], v1, v2 |
| $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr1, undef $vgpr2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec |
| |
| ; GCN-NOT: s_set_vgpr_msb |
| ; GCN-NEXT: v_wmma_ld_scale16_paired_b64 v[0:1], v[2:3] |
| V_WMMA_LD_SCALE16_PAIRED_B64 undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x105 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=1 src2=0 |
| ; GCN-NEXT: v_wmma_scale16_f32_16x16x128_f8f6f4 v[210:217], v[244:259] /*v[500:515]*/, v[244:259] /*v[500:515]*/, v[10:17], v[0:1], v[2:3] |
| $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 0x500 |
| ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_wmma_scale16_f32_16x16x128_f8f6f4 v[210:217], v[100:115], v[100:115], v[10:17], v[0:1], v[2:3] |
| $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, undef $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec |
| |
| ; GCN-NEXT: s_set_vgpr_msb 1 |
| ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 |
| ; GCN-NEXT: v_wmma_scale16_f32_16x16x128_f8f6f4 v[210:217], v[244:259] /*v[500:515]*/, v[0:15], v[10:17], v[0:1], v[2:3] |
| $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec |
| ... |