blob: ebc7253cf20277a1806561934cf8b0dd0f36791f [file]
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -amdgpu-s-branch-bits=4 -run-pass branch-relaxation %s -o - | FileCheck %s
# Test that getInstSizeInBytes correctly estimates S_MOV_B64 with 64-bit
# literal values on targets with 64-bit literal support (gfx1250).
#
# Values outside [0, 2^31-1] require 64-bit literal encoding, making the
# instruction 12 bytes (4-byte opcode + 8-byte literal) instead of 8 bytes
# (4-byte opcode + 4-byte literal).
#
# With -amdgpu-s-branch-bits=4, forward branches can reach at most +7 dwords.
# Three S_MOV_B64 with 64-bit literals = 3 * 12 = 36 bytes = 9 dwords,
# which exceeds the 7-dword limit, so the branch must be relaxed.
#
# Without the correct size estimation (8 bytes instead of 12), the total
# would be 3 * 8 = 24 bytes = 6 dwords, fitting within the limit, and
# relaxation would not occur. In a rare real-world scenario, this could lead to
# an assembler error where branch size exceeds simm16.
# The branch is relaxed: the original S_CBRANCH_SCC0 is inverted to
# S_CBRANCH_SCC1 (skipping the long branch), and a new block (bb.3) is
# inserted with S_ADD_PC_I64 for the long branch.
---
name: s_mov_b64_64bit_literal_size
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
body: |
; CHECK-LABEL: name: s_mov_b64_64bit_literal_size
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $sgpr8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CMP_EQ_U32 $sgpr8, 0, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr4_sgpr5 = S_GETPC_B64 post-instr-symbol <mcsymbol >
; CHECK-NEXT: $sgpr4 = S_ADD_U32 $sgpr4, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
; CHECK-NEXT: $sgpr5 = S_ADDC_U32 $sgpr5, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
; CHECK-NEXT: S_SETPC_B64 $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr10_sgpr11 = S_MOV_B64 4294967295
; CHECK-NEXT: $sgpr12_sgpr13 = S_MOV_B64 2147483648
; CHECK-NEXT: $sgpr14_sgpr15 = S_MOV_B64 -17
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $sgpr8
S_CMP_EQ_U32 $sgpr8, 0, implicit-def $scc
S_CBRANCH_SCC0 %bb.2, implicit $scc
bb.1:
; S_MOV_B64 with values requiring 64-bit literal encoding (12 bytes each).
; These values are outside the [0, 2^31-1] range where 32-bit literal
; can be used, so they need 64-bit literal encoding on gfx1250.
; 0xFFFFFFFF (4294967295) is in [2^31, 2^32-1].
; 0x80000000 (2147483648) is exactly 2^31.
; -17 (0xFFFFFFFFFFFFFFEF) is a negative non-inline constant.
$sgpr10_sgpr11 = S_MOV_B64 4294967295
$sgpr12_sgpr13 = S_MOV_B64 2147483648
$sgpr14_sgpr15 = S_MOV_B64 -17
bb.2:
S_ENDPGM 0
...