blob: c423e7b3adeab4cc6c7cab6c7d4aa1dd4208edc6 [file] [log] [blame]
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 --stop-after=greedy,1 -verify-machineinstrs < %s | FileCheck -check-prefix=REGALLOC-GFX908 %s
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 --stop-after=prologepilog -verify-machineinstrs < %s | FileCheck -check-prefix=PEI-GFX908 %s
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --stop-after=greedy,1 -verify-machineinstrs < %s | FileCheck -check-prefix=REGALLOC-GFX90A %s
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --stop-after=prologepilog -verify-machineinstrs < %s | FileCheck -check-prefix=PEI-GFX90A %s
; Partial reg copy and spill missed during regalloc handled later at frame lowering.
define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX908-LABEL: name: partial_copy
; REGALLOC-GFX908: bb.0 (%ir-block.0):
; REGALLOC-GFX908: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2949130 /* regdef:VReg_64 */, def [[VREG_64:%[0-9]+]]
; REGALLOC-GFX908: SI_SPILL_V64_SAVE [[VREG_64]], %stack.0
; REGALLOC-GFX908: [[V_MFMA_I32_4X4X4I8_A128:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64
; REGALLOC-GFX908: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0
; REGALLOC-GFX908: GLOBAL_STORE_DWORDX2 undef %{{[0-9]+}}:vreg_64, [[SI_SPILL_V64_RESTORE]]
; REGALLOC-GFX908: [[COPY_A128_TO_V128:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_A128]]
; REGALLOC-GFX908: GLOBAL_STORE_DWORDX4 undef %{{[0-9]+}}:vreg_64, [[COPY_A128_TO_V128]]
;
; PEI-GFX908-LABEL: name: partial_copy
; PEI-GFX908: bb.0 (%ir-block.0):
; PEI-GFX908: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2949130 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
; PEI-GFX908: BUFFER_STORE_DWORD_OFFSET killed $vgpr0
; PEI-GFX908: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1
; PEI-GFX908: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64
; PEI-GFX908: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET
; PEI-GFX908: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4
; PEI-GFX908: GLOBAL_STORE_DWORDX2 undef renamable ${{.*}}, killed renamable $vgpr0_vgpr1
; PEI-GFX908: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec
; PEI-GFX908: GLOBAL_STORE_DWORDX4 undef renamable ${{.*}}, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3
;
; REGALLOC-GFX90A-LABEL: name: partial_copy
; REGALLOC-GFX90A: bb.0 (%ir-block.0):
; REGALLOC-GFX90A: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64_Align2 */, def [[VREG_64:%[0-9]+]]
; REGALLOC-GFX90A: SI_SPILL_V64_SAVE [[VREG_64]], %stack.0
; REGALLOC-GFX90A: [[V_MFMA_I32_4X4X4I8_A128:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64
; REGALLOC-GFX90A: [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64_align2 = SI_SPILL_AV64_RESTORE %stack.0
; REGALLOC-GFX90A: GLOBAL_STORE_DWORDX2 undef %{{[0-9]+}}:vreg_64_align2, [[SI_SPILL_AV64_RESTORE]]
; REGALLOC-GFX90A: GLOBAL_STORE_DWORDX4 undef %{{[0-9]+}}:vreg_64_align2, [[V_MFMA_I32_4X4X4I8_A128]]
;
; PEI-GFX90A-LABEL: name: partial_copy
; PEI-GFX90A: bb.0 (%ir-block.0):
; PEI-GFX90A: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1
; PEI-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0
; PEI-GFX90A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1
; PEI-GFX90A: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64
; PEI-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET
; PEI-GFX90A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4
; PEI-GFX90A: GLOBAL_STORE_DWORDX2 undef renamable ${{.*}}, killed renamable $vgpr0_vgpr1
; PEI-GFX90A: GLOBAL_STORE_DWORDX4 undef renamable ${{.*}}, killed renamable $agpr0_agpr1_agpr2_agpr3
call void asm sideeffect "; use $0", "a" (i32 undef)
%v0 = call <4 x i32> asm sideeffect "; def $0", "=v" ()
%v1 = call <2 x i32> asm sideeffect "; def $0", "=v" ()
%mai = tail call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 1, i32 2, <4 x i32> %arg, i32 0, i32 0, i32 0)
store volatile <4 x i32> %v0, <4 x i32> addrspace(1)* undef
store volatile <2 x i32> %v1, <2 x i32> addrspace(1)* undef
store volatile <4 x i32> %mai, <4 x i32> addrspace(1)* undef
ret void
}
declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32)
attributes #0 = { nounwind "amdgpu-num-vgpr"="5" }