llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx1250.mir - llvm-project - Git at Google

 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -amdgpu-s-branch-bits=4 -run-pass branch-relaxation %s -o - | FileCheck %s

 # Test that getInstSizeInBytes correctly estimates S_MOV_B64 with 64-bit
 # literal values on targets with 64-bit literal support (gfx1250).
 #
 # Values outside [0, 2^31-1] require 64-bit literal encoding, making the
 # instruction 12 bytes (4-byte opcode + 8-byte literal) instead of 8 bytes
 # (4-byte opcode + 4-byte literal).
 #
 # With -amdgpu-s-branch-bits=4, forward branches can reach at most +7 dwords.
 # Three S_MOV_B64 with 64-bit literals = 3 * 12 = 36 bytes = 9 dwords,
 # which exceeds the 7-dword limit, so the branch must be relaxed.
 #
 # Without the correct size estimation (8 bytes instead of 12), the total
 # would be 3 * 8 = 24 bytes = 6 dwords, fitting within the limit, and
 # relaxation would not occur. In a rare real-world scenario, this could lead to
 # an assembler error where branch size exceeds simm16.

 # The branch is relaxed: the original S_CBRANCH_SCC0 is inverted to
 # S_CBRANCH_SCC1 (skipping the long branch), and a new block (bb.3) is
 # inserted with S_ADD_PC_I64 for the long branch.

 ---
 name: s_mov_b64_64bit_literal_size
 tracksRegLiveness: true
 machineFunctionInfo:
   scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
   stackPtrOffsetReg: '$sgpr32'
 body: |
   ; CHECK-LABEL: name: s_mov_b64_64bit_literal_size
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.1(0x40000000)
   ; CHECK-NEXT:   liveins: $sgpr8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr8, 0, implicit-def $scc
   ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3:
   ; CHECK-NEXT:   successors: %bb.2(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr4_sgpr5 = S_GETPC_B64 post-instr-symbol <mcsymbol >
   ; CHECK-NEXT:   $sgpr4 = S_ADD_U32 $sgpr4, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
   ; CHECK-NEXT:   $sgpr5 = S_ADDC_U32 $sgpr5, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
   ; CHECK-NEXT:   S_SETPC_B64 $sgpr4_sgpr5
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors: %bb.2(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr10_sgpr11 = S_MOV_B64 4294967295
   ; CHECK-NEXT:   $sgpr12_sgpr13 = S_MOV_B64 2147483648
   ; CHECK-NEXT:   $sgpr14_sgpr15 = S_MOV_B64 -17
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   S_ENDPGM 0
   bb.0:
     liveins: $sgpr8
     S_CMP_EQ_U32 $sgpr8, 0, implicit-def $scc
     S_CBRANCH_SCC0 %bb.2, implicit $scc

   bb.1:
     ; S_MOV_B64 with values requiring 64-bit literal encoding (12 bytes each).
     ; These values are outside the [0, 2^31-1] range where 32-bit literal
     ; can be used, so they need 64-bit literal encoding on gfx1250.
     ; 0xFFFFFFFF (4294967295) is in [2^31, 2^32-1].
     ; 0x80000000 (2147483648) is exactly 2^31.
     ; -17 (0xFFFFFFFFFFFFFFEF) is a negative non-inline constant.
     $sgpr10_sgpr11 = S_MOV_B64 4294967295
     $sgpr12_sgpr13 = S_MOV_B64 2147483648
     $sgpr14_sgpr15 = S_MOV_B64 -17

   bb.2:
     S_ENDPGM 0
 ...
	# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
	# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -amdgpu-s-branch-bits=4 -run-pass branch-relaxation %s -o - \| FileCheck %s

	# Test that getInstSizeInBytes correctly estimates S_MOV_B64 with 64-bit
	# literal values on targets with 64-bit literal support (gfx1250).
	#
	# Values outside [0, 2^31-1] require 64-bit literal encoding, making the
	# instruction 12 bytes (4-byte opcode + 8-byte literal) instead of 8 bytes
	# (4-byte opcode + 4-byte literal).
	#
	# With -amdgpu-s-branch-bits=4, forward branches can reach at most +7 dwords.
	# Three S_MOV_B64 with 64-bit literals = 3 * 12 = 36 bytes = 9 dwords,
	# which exceeds the 7-dword limit, so the branch must be relaxed.
	#
	# Without the correct size estimation (8 bytes instead of 12), the total
	# would be 3 * 8 = 24 bytes = 6 dwords, fitting within the limit, and
	# relaxation would not occur. In a rare real-world scenario, this could lead to
	# an assembler error where branch size exceeds simm16.

	# The branch is relaxed: the original S_CBRANCH_SCC0 is inverted to
	# S_CBRANCH_SCC1 (skipping the long branch), and a new block (bb.3) is
	# inserted with S_ADD_PC_I64 for the long branch.

	---
	name: s_mov_b64_64bit_literal_size
	tracksRegLiveness: true
	machineFunctionInfo:
	scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
	stackPtrOffsetReg: '$sgpr32'
	body: \|
	; CHECK-LABEL: name: s_mov_b64_64bit_literal_size
	; CHECK: bb.0:
	; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
	; CHECK-NEXT: liveins: $sgpr8
	; CHECK-NEXT: {{ $}}
	; CHECK-NEXT: S_CMP_EQ_U32 $sgpr8, 0, implicit-def $scc
	; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
	; CHECK-NEXT: {{ $}}
	; CHECK-NEXT: bb.3:
	; CHECK-NEXT: successors: %bb.2(0x80000000)
	; CHECK-NEXT: {{ $}}
	; CHECK-NEXT: $sgpr4_sgpr5 = S_GETPC_B64 post-instr-symbol <mcsymbol >
	; CHECK-NEXT: $sgpr4 = S_ADD_U32 $sgpr4, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
	; CHECK-NEXT: $sgpr5 = S_ADDC_U32 $sgpr5, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
	; CHECK-NEXT: S_SETPC_B64 $sgpr4_sgpr5
	; CHECK-NEXT: {{ $}}
	; CHECK-NEXT: bb.1:
	; CHECK-NEXT: successors: %bb.2(0x80000000)
	; CHECK-NEXT: {{ $}}
	; CHECK-NEXT: $sgpr10_sgpr11 = S_MOV_B64 4294967295
	; CHECK-NEXT: $sgpr12_sgpr13 = S_MOV_B64 2147483648
	; CHECK-NEXT: $sgpr14_sgpr15 = S_MOV_B64 -17
	; CHECK-NEXT: {{ $}}
	; CHECK-NEXT: bb.2:
	; CHECK-NEXT: S_ENDPGM 0
	bb.0:
	liveins: $sgpr8
	S_CMP_EQ_U32 $sgpr8, 0, implicit-def $scc
	S_CBRANCH_SCC0 %bb.2, implicit $scc

	bb.1:
	; S_MOV_B64 with values requiring 64-bit literal encoding (12 bytes each).
	; These values are outside the [0, 2^31-1] range where 32-bit literal
	; can be used, so they need 64-bit literal encoding on gfx1250.
	; 0xFFFFFFFF (4294967295) is in [2^31, 2^32-1].
	; 0x80000000 (2147483648) is exactly 2^31.
	; -17 (0xFFFFFFFFFFFFFFEF) is a negative non-inline constant.
	$sgpr10_sgpr11 = S_MOV_B64 4294967295
	$sgpr12_sgpr13 = S_MOV_B64 2147483648
	$sgpr14_sgpr15 = S_MOV_B64 -17

	bb.2:
	S_ENDPGM 0
	...