| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s |
| |
| ; SIFixSGPRCopies will insert a readfirstlane from an AV source |
| ; register, which needs to be constrained by VGPR to satisfy the |
| ; operand constraint. |
| |
| define amdgpu_kernel void @constrain_readfirstlane_av(i64 %arg, ptr addrspace(1) %ptr) { |
| ; CHECK-LABEL: constrain_readfirstlane_av: |
| ; CHECK: ; %bb.0: ; %bb |
| ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: s_mov_b32 s5, 0 |
| ; CHECK-NEXT: s_mov_b64 s[6:7], 0 |
| ; CHECK-NEXT: s_and_b64 vcc, exec, -1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_load_ushort v1, v0, s[2:3] glc |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_readfirstlane_b32 s4, v1 |
| ; CHECK-NEXT: s_and_b32 s4, s4, 0xffff |
| ; CHECK-NEXT: .LBB0_1: ; %bb16 |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: s_mul_i32 s8, s6, s1 |
| ; CHECK-NEXT: s_mul_hi_u32 s9, s6, s0 |
| ; CHECK-NEXT: s_mul_i32 s7, s7, s0 |
| ; CHECK-NEXT: s_add_i32 s8, s9, s8 |
| ; CHECK-NEXT: s_mul_i32 s6, s6, s0 |
| ; CHECK-NEXT: s_add_i32 s7, s8, s7 |
| ; CHECK-NEXT: s_lshl_b64 s[6:7], s[6:7], 5 |
| ; CHECK-NEXT: s_add_u32 s6, s2, s6 |
| ; CHECK-NEXT: s_addc_u32 s7, s3, s7 |
| ; CHECK-NEXT: global_load_dword v1, v0, s[6:7] glc |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: s_mov_b64 s[6:7], s[4:5] |
| ; CHECK-NEXT: s_mov_b64 vcc, vcc |
| ; CHECK-NEXT: s_cbranch_vccnz .LBB0_1 |
| ; CHECK-NEXT: ; %bb.2: ; %DummyReturnBlock |
| ; CHECK-NEXT: s_endpgm |
| bb: |
| %i = load volatile i16, ptr addrspace(1) %ptr, align 2 |
| %i6 = zext i16 %i to i64 |
| br label %bb16 |
| |
| bb16: ; preds = %bb16, %bb |
| %i17 = phi i64 [ %i6, %bb16 ], [ 0, %bb ] |
| %i23 = mul i64 %i17, %arg |
| %i25.split = getelementptr [16 x half], ptr addrspace(1) %ptr, i64 %i23 |
| %i27 = load volatile <2 x half>, ptr addrspace(1) %i25.split, align 16 |
| br label %bb16 |
| } |
| |
| define void @av_class_to_m0(ptr addrspace(1) %ptr) { |
| ; CHECK-LABEL: av_class_to_m0: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dword v0, v[0:1], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_readfirstlane_b32 s4, v0 |
| ; CHECK-NEXT: s_mov_b32 m0, s4 |
| ; CHECK-NEXT: ;;#ASMSTART |
| ; CHECK-NEXT: ; use m0 |
| ; CHECK-NEXT: ;;#ASMEND |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| %load = load i32, ptr addrspace(1) %ptr |
| call void asm sideeffect "; use $0", "{m0}"(i32 %load) |
| ret void |
| } |