blob: 0a6f2fe9375d5ae9d7f3bc9aa6c61668dfd28736 [file] [edit]
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=machine-scheduler -verify-misched %s -o - | FileCheck -check-prefix=DEFAULT %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=machine-scheduler -amdgpu-sched-strategy=coexec -verify-misched %s -o - | FileCheck -check-prefix=COEXEC %s
--- |
define void @test-sched-effective-stall() #0 { ret void }
define void @test-sched-pending-structural-stall() #0 { ret void }
attributes #0 = { "amdgpu-waves-per-eu"="1,1" }
...
---
name: test-sched-effective-stall
tracksRegLiveness: true
body: |
bb.0:
; DEFAULT-LABEL: name: test-sched-effective-stall
; DEFAULT: [[DEF:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF4:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF5:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; DEFAULT-NEXT: early-clobber %13:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr [[DEF]], [[DEF1]], 0, [[DEF2]], [[DEF3]], [[DEF4]], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; DEFAULT-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[DEF5]], 0, 0, implicit $exec
; DEFAULT-NEXT: [[DEF6:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF7:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF8:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF9:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF10:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; DEFAULT-NEXT: [[V_PK_ADD_F32_:%[0-9]+]]:vreg_64_align2 = V_PK_ADD_F32 8, [[GLOBAL_LOAD_DWORDX2_]], 8, [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
; DEFAULT-NEXT: early-clobber %14:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr [[DEF6]], [[DEF7]], 0, [[DEF8]], [[DEF9]], [[DEF10]], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; DEFAULT-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F32_]], implicit %13, implicit %14
;
; COEXEC-LABEL: name: test-sched-effective-stall
; COEXEC: [[DEF:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF1:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF4:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; COEXEC-NEXT: early-clobber %13:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr [[DEF]], [[DEF1]], 0, [[DEF2]], [[DEF3]], [[DEF4]], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; COEXEC-NEXT: [[DEF5:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF6:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF7:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF8:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF9:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; COEXEC-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[DEF10]], 0, 0, implicit $exec
; COEXEC-NEXT: early-clobber %14:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr [[DEF5]], [[DEF6]], 0, [[DEF7]], [[DEF8]], [[DEF9]], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; COEXEC-NEXT: [[V_PK_ADD_F32_:%[0-9]+]]:vreg_64_align2 = V_PK_ADD_F32 8, [[GLOBAL_LOAD_DWORDX2_]], 8, [[GLOBAL_LOAD_DWORDX2_]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
; COEXEC-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F32_]], implicit %13, implicit %14
%0:vreg_512_align2 = IMPLICIT_DEF
%1:vreg_512_align2 = IMPLICIT_DEF
%2:vreg_256_align2 = IMPLICIT_DEF
%3:vgpr_32_lo256 = IMPLICIT_DEF
%4:vgpr_32_lo256 = IMPLICIT_DEF
%5:vreg_512_align2 = IMPLICIT_DEF
%6:vreg_512_align2 = IMPLICIT_DEF
%7:vreg_256_align2 = IMPLICIT_DEF
%8:vgpr_32_lo256 = IMPLICIT_DEF
%9:vgpr_32_lo256 = IMPLICIT_DEF
%10:vreg_64_align2 = IMPLICIT_DEF
%11:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %10, 0, 0, implicit $exec
%12:vreg_64_align2 = V_PK_ADD_F32 8, %11, 8, %11, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
%13:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr %0, %1, 0, %2, %3, %4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
%14:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr %5, %6, 0, %7, %8, %9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0, implicit %12, implicit %13, implicit %14
...
---
name: test-sched-pending-structural-stall
tracksRegLiveness: true
body: |
bb.0:
; DEFAULT-LABEL: name: test-sched-pending-structural-stall
; DEFAULT: [[DEF:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF4:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; DEFAULT-NEXT: early-clobber %10:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr [[DEF]], [[DEF1]], 0, [[DEF2]], [[DEF3]], [[DEF4]], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; DEFAULT-NEXT: [[DEF5:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF6:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF7:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF8:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF9:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; DEFAULT-NEXT: S_NOP 0
; DEFAULT-NEXT: S_NOP 0
; DEFAULT-NEXT: early-clobber %11:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr [[DEF5]], [[DEF6]], 0, [[DEF7]], [[DEF8]], [[DEF9]], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; DEFAULT-NEXT: S_ENDPGM 0, implicit %10, implicit %11
;
; COEXEC-LABEL: name: test-sched-pending-structural-stall
; COEXEC: S_NOP 0
; COEXEC-NEXT: S_NOP 0
; COEXEC-NEXT: [[DEF:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF1:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF4:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; COEXEC-NEXT: early-clobber %10:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr [[DEF]], [[DEF1]], 0, [[DEF2]], [[DEF3]], [[DEF4]], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; COEXEC-NEXT: [[DEF5:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF6:%[0-9]+]]:vreg_512_align2 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF7:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF8:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; COEXEC-NEXT: [[DEF9:%[0-9]+]]:vgpr_32_lo256 = IMPLICIT_DEF
; COEXEC-NEXT: early-clobber %11:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr [[DEF5]], [[DEF6]], 0, [[DEF7]], [[DEF8]], [[DEF9]], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; COEXEC-NEXT: S_ENDPGM 0, implicit %10, implicit %11
%0:vreg_512_align2 = IMPLICIT_DEF
%1:vreg_512_align2 = IMPLICIT_DEF
%2:vreg_256_align2 = IMPLICIT_DEF
%3:vgpr_32_lo256 = IMPLICIT_DEF
%4:vgpr_32_lo256 = IMPLICIT_DEF
%5:vreg_512_align2 = IMPLICIT_DEF
%6:vreg_512_align2 = IMPLICIT_DEF
%7:vreg_256_align2 = IMPLICIT_DEF
%8:vgpr_32_lo256 = IMPLICIT_DEF
%9:vgpr_32_lo256 = IMPLICIT_DEF
%10:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr %0, %1, 0, %2, %3, %4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
%11:vreg_256_align2 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr %5, %6, 0, %7, %8, %9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
S_NOP 0
S_NOP 0
S_ENDPGM 0, implicit %10, implicit %11
...