| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -amdgpu-s-branch-bits=4 -run-pass branch-relaxation %s -o - | FileCheck %s |
| |
| # Test that getInstSizeInBytes correctly estimates S_MOV_B64 with 64-bit |
| # literal values on targets with 64-bit literal support (gfx1250). |
| # |
| # Values outside [0, 2^31-1] require 64-bit literal encoding, making the |
| # instruction 12 bytes (4-byte opcode + 8-byte literal) instead of 8 bytes |
| # (4-byte opcode + 4-byte literal). |
| # |
| # With -amdgpu-s-branch-bits=4, forward branches can reach at most +7 dwords. |
| # Three S_MOV_B64 with 64-bit literals = 3 * 12 = 36 bytes = 9 dwords, |
| # which exceeds the 7-dword limit, so the branch must be relaxed. |
| # |
| # Without the correct size estimation (8 bytes instead of 12), the total |
| # would be 3 * 8 = 24 bytes = 6 dwords, fitting within the limit, and |
| # relaxation would not occur. In a rare real-world scenario, this could lead to |
| # an assembler error where branch size exceeds simm16. |
| |
| # The branch is relaxed: the original S_CBRANCH_SCC0 is inverted to |
| # S_CBRANCH_SCC1 (skipping the long branch), and a new block (bb.3) is |
| # inserted with S_ADD_PC_I64 for the long branch. |
| |
| --- |
| name: s_mov_b64_64bit_literal_size |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| body: | |
| ; CHECK-LABEL: name: s_mov_b64_64bit_literal_size |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $sgpr8 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr8, 0, implicit-def $scc |
| ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $sgpr4_sgpr5 = S_GETPC_B64 post-instr-symbol <mcsymbol > |
| ; CHECK-NEXT: $sgpr4 = S_ADD_U32 $sgpr4, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc |
| ; CHECK-NEXT: $sgpr5 = S_ADDC_U32 $sgpr5, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc |
| ; CHECK-NEXT: S_SETPC_B64 $sgpr4_sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $sgpr10_sgpr11 = S_MOV_B64 4294967295 |
| ; CHECK-NEXT: $sgpr12_sgpr13 = S_MOV_B64 2147483648 |
| ; CHECK-NEXT: $sgpr14_sgpr15 = S_MOV_B64 -17 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb.0: |
| liveins: $sgpr8 |
| S_CMP_EQ_U32 $sgpr8, 0, implicit-def $scc |
| S_CBRANCH_SCC0 %bb.2, implicit $scc |
| |
| bb.1: |
| ; S_MOV_B64 with values requiring 64-bit literal encoding (12 bytes each). |
| ; These values are outside the [0, 2^31-1] range where 32-bit literal |
| ; can be used, so they need 64-bit literal encoding on gfx1250. |
| ; 0xFFFFFFFF (4294967295) is in [2^31, 2^32-1]. |
| ; 0x80000000 (2147483648) is exactly 2^31. |
| ; -17 (0xFFFFFFFFFFFFFFEF) is a negative non-inline constant. |
| $sgpr10_sgpr11 = S_MOV_B64 4294967295 |
| $sgpr12_sgpr13 = S_MOV_B64 2147483648 |
| $sgpr14_sgpr15 = S_MOV_B64 -17 |
| |
| bb.2: |
| S_ENDPGM 0 |
| ... |