blob: 116f46df010491dc07aca3e62790d2d7057f3fdf [file]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s
; SIFixSGPRCopies will insert a readfirstlane from an AV source
; register, which needs to be constrained by VGPR to satisfy the
; operand constraint.
define amdgpu_kernel void @constrain_readfirstlane_av(i64 %arg, ptr addrspace(1) %ptr) {
; CHECK-LABEL: constrain_readfirstlane_av:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_mov_b32 s5, 0
; CHECK-NEXT: s_mov_b64 s[6:7], 0
; CHECK-NEXT: s_and_b64 vcc, exec, -1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_load_ushort v1, v0, s[2:3] glc
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_readfirstlane_b32 s4, v1
; CHECK-NEXT: s_and_b32 s4, s4, 0xffff
; CHECK-NEXT: .LBB0_1: ; %bb16
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_mul_i32 s8, s6, s1
; CHECK-NEXT: s_mul_hi_u32 s9, s6, s0
; CHECK-NEXT: s_mul_i32 s7, s7, s0
; CHECK-NEXT: s_add_i32 s8, s9, s8
; CHECK-NEXT: s_mul_i32 s6, s6, s0
; CHECK-NEXT: s_add_i32 s7, s8, s7
; CHECK-NEXT: s_lshl_b64 s[6:7], s[6:7], 5
; CHECK-NEXT: s_add_u32 s6, s2, s6
; CHECK-NEXT: s_addc_u32 s7, s3, s7
; CHECK-NEXT: global_load_dword v1, v0, s[6:7] glc
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_mov_b64 s[6:7], s[4:5]
; CHECK-NEXT: s_mov_b64 vcc, vcc
; CHECK-NEXT: s_cbranch_vccnz .LBB0_1
; CHECK-NEXT: ; %bb.2: ; %DummyReturnBlock
; CHECK-NEXT: s_endpgm
bb:
%i = load volatile i16, ptr addrspace(1) %ptr, align 2
%i6 = zext i16 %i to i64
br label %bb16
bb16: ; preds = %bb16, %bb
%i17 = phi i64 [ %i6, %bb16 ], [ 0, %bb ]
%i23 = mul i64 %i17, %arg
%i25.split = getelementptr [16 x half], ptr addrspace(1) %ptr, i64 %i23
%i27 = load volatile <2 x half>, ptr addrspace(1) %i25.split, align 16
br label %bb16
}
define void @av_class_to_m0(ptr addrspace(1) %ptr) {
; CHECK-LABEL: av_class_to_m0:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dword v0, v[0:1], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_readfirstlane_b32 s4, v0
; CHECK-NEXT: s_mov_b32 m0, s4
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use m0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
%load = load i32, ptr addrspace(1) %ptr
call void asm sideeffect "; use $0", "{m0}"(i32 %load)
ret void
}