| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-SDAG -enable-var-scope %s |
| ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -global-isel -global-isel-abort=2 < %s | FileCheck -check-prefix=CHECK-GISEL -enable-var-scope %s |
| |
| define void @test_readfirstlane_i1(ptr addrspace(1) %out, i1 %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_i1: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 1 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4 |
| ; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2 |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_i1: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: s_and_b32 s4, s4, 1 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4 |
| ; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2 |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %src) |
| store i1 %readfirstlane, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define void @test_readfirstlane_i1_inreg(ptr addrspace(1) %out, i1 inreg %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_i1_inreg: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: s_and_b32 s4, s16, 1 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4 |
| ; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2 |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_i1_inreg: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: s_and_b32 s4, s16, 1 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4 |
| ; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2 |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %src) |
| store i1 %readfirstlane, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define void @test_readfirstlane_i1_select(ptr addrspace(1) %out, i32 %src, i32 %src1) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_i1_select: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_cmp_lt_u32_e32 vcc, 42, v2 |
| ; CHECK-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v4 |
| ; CHECK-SDAG-NEXT: s_bitcmp1_b32 s4, 0 |
| ; CHECK-SDAG-NEXT: s_cselect_b64 vcc, -1, 0 |
| ; CHECK-SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc |
| ; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2 |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_i1_select: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_cmp_lt_u32_e32 vcc, 42, v2 |
| ; CHECK-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v4 |
| ; CHECK-GISEL-NEXT: s_and_b32 s4, 1, s4 |
| ; CHECK-GISEL-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 |
| ; CHECK-GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc |
| ; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2 |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %cmp = icmp ugt i32 %src, 42 |
| %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %cmp) |
| %sel = select i1 %readfirstlane, i32 %src, i32 %src1 |
| store i32 %sel, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define void @test_readfirstlane_i1_load(ptr addrspace(1) %out, ptr addrspace(1) %in) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_i1_load: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: flat_load_ubyte v2, v[2:3] |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 1 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4 |
| ; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2 |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_i1_load: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: flat_load_ubyte v2, v[2:3] |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: s_and_b32 s4, s4, 1 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4 |
| ; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2 |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %load = load i1, ptr addrspace(1) %in |
| %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %load) |
| store i1 %readfirstlane, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_i32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4 |
| ; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2 |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_i32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4 |
| ; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2 |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %src) |
| store i32 %readfirstlane, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define void @test_readfirstlane_i64(ptr addrspace(1) %out, i64 %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_i64: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v2 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s5 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s4 |
| ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_i64: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s5 |
| ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %src) |
| store i64 %readfirstlane, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define void @test_readfirstlane_v2i64(ptr addrspace(1) %out, <2 x i64> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v2i64: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:7] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v2i64: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:7] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> %src) |
| call void asm sideeffect "; use $0", "s"(<2 x i64> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v3i64(ptr addrspace(1) %out, <3 x i64> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v3i64: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:9] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v3i64: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:9] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <3 x i64> @llvm.amdgcn.readfirstlane.v3i64(<3 x i64> %src) |
| call void asm sideeffect "; use $0", "s"(<3 x i64> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v4i64(ptr addrspace(1) %out, <4 x i64> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v4i64: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:11] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v4i64: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:11] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <4 x i64> @llvm.amdgcn.readfirstlane.v4i64(<4 x i64> %src) |
| call void asm sideeffect "; use $0", "s"(<4 x i64> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v8i64(ptr addrspace(1) %out, <8 x i64> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v8i64: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s19, v17 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s18, v16 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s17, v15 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s16, v14 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s15, v13 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s14, v12 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s13, v11 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s12, v10 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:19] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v8i64: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s12, v10 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s13, v11 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s14, v12 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s15, v13 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s16, v14 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s17, v15 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s18, v16 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s19, v17 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:19] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <8 x i64> @llvm.amdgcn.readfirstlane.v8i64(<8 x i64> %src) |
| call void asm sideeffect "; use $0", "s"(<8 x i64> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_f64(ptr addrspace(1) %out, double %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_f64: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v2 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s5 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s4 |
| ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_f64: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s5 |
| ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double %src) |
| store double %readfirstlane, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_readfirstlane_imm_i32(ptr addrspace(1) %out) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_imm_i32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_mov_b32 s0, 32 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s0 |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_endpgm |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_imm_i32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_mov_b32 s0, 32 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s0 |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_endpgm |
| %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 32) |
| call void asm sideeffect "; use $0", "s"(i32 %readfirstlane) |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_readfirstlane_imm_i64(ptr addrspace(1) %out) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_imm_i64: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_mov_b64 s[0:1], 32 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[0:1] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_endpgm |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_imm_i64: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_mov_b64 s[0:1], 32 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[0:1] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_endpgm |
| %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 32) |
| call void asm sideeffect "; use $0", "s"(i64 %readfirstlane) |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_imm_f64: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_mov_b32 s0, 0 |
| ; CHECK-SDAG-NEXT: s_mov_b32 s1, 0x40400000 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[0:1] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_endpgm |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_imm_f64: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_mov_b32 s0, 0 |
| ; CHECK-GISEL-NEXT: s_mov_b32 s1, 0x40400000 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[0:1] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_endpgm |
| %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double 32.0) |
| call void asm sideeffect "; use $0", "s"(double %readfirstlane) |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_readfirstlane_imm_fold_i32(ptr addrspace(1) %out) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 |
| ; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17 |
| ; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| ; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, 32 |
| ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1 |
| ; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2 |
| ; CHECK-SDAG-NEXT: s_endpgm |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_i32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 |
| ; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17 |
| ; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| ; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, 32 |
| ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s1 |
| ; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2 |
| ; CHECK-GISEL-NEXT: s_endpgm |
| %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 32) |
| store i32 %readfirstlane, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i64: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 |
| ; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17 |
| ; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| ; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, 32 |
| ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0 |
| ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; CHECK-SDAG-NEXT: s_endpgm |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_i64: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 |
| ; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 32 |
| ; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1 |
| ; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0 |
| ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; CHECK-GISEL-NEXT: s_endpgm |
| %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 32) |
| store i64 %readfirstlane, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_f64: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 |
| ; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17 |
| ; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| ; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, 0x40400000 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0 |
| ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; CHECK-SDAG-NEXT: s_endpgm |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_f64: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 |
| ; CHECK-GISEL-NEXT: s_mov_b32 s2, 0 |
| ; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17 |
| ; CHECK-GISEL-NEXT: s_mov_b32 s3, 0x40400000 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1 |
| ; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| ; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0 |
| ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; CHECK-GISEL-NEXT: s_endpgm |
| %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double 32.0) |
| store double %readfirstlane, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_m0: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 |
| ; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17 |
| ; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| ; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: s_mov_b32 m0, -1 |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, m0 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1 |
| ; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2 |
| ; CHECK-SDAG-NEXT: s_endpgm |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_m0: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 |
| ; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17 |
| ; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| ; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: s_mov_b32 m0, -1 |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, m0 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s1 |
| ; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2 |
| ; CHECK-GISEL-NEXT: s_endpgm |
| %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"() |
| %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %m0) |
| store i32 %readfirstlane, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i32(ptr addrspace(1) %out) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 |
| ; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17 |
| ; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| ; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: s_mov_b32 s2, 0 |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s2 |
| ; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2 |
| ; CHECK-SDAG-NEXT: s_endpgm |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_i32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 |
| ; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17 |
| ; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| ; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: s_mov_b32 s2, 0 |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s0 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s2 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s1 |
| ; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2 |
| ; CHECK-GISEL-NEXT: s_endpgm |
| %sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"() |
| %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %sgpr) |
| store i32 %readfirstlane, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1) %out) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i64: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 |
| ; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17 |
| ; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| ; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0 |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0 |
| ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; CHECK-SDAG-NEXT: s_endpgm |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_i64: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 |
| ; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 0 |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1 |
| ; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0 |
| ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; CHECK-GISEL-NEXT: s_endpgm |
| %sgpr = call i64 asm "s_mov_b64 $0, 0", "=s"() |
| %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %sgpr) |
| store i64 %readfirstlane, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1) %out) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_f64: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 |
| ; CHECK-SDAG-NEXT: s_add_i32 s12, s12, s17 |
| ; CHECK-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| ; CHECK-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0 |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0 |
| ; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; CHECK-SDAG-NEXT: s_endpgm |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_f64: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 |
| ; CHECK-GISEL-NEXT: s_add_i32 s12, s12, s17 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 0 |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2 |
| ; CHECK-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| ; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1 |
| ; CHECK-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3 |
| ; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0 |
| ; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; CHECK-GISEL-NEXT: s_endpgm |
| %sgpr = call double asm "s_mov_b64 $0, 0", "=s"() |
| %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double %sgpr) |
| store double %readfirstlane, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_fi: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_add_u32 s0, s0, s17 |
| ; CHECK-SDAG-NEXT: s_addc_u32 s1, s1, 0 |
| ; CHECK-SDAG-NEXT: s_mov_b32 s4, 0 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s4 |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_endpgm |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_fi: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_add_u32 s0, s0, s17 |
| ; CHECK-GISEL-NEXT: s_addc_u32 s1, s1, 0 |
| ; CHECK-GISEL-NEXT: s_mov_b32 s4, 0 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s4 |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_endpgm |
| %alloca = alloca i32, addrspace(5) |
| %int = ptrtoint ptr addrspace(5) %alloca to i32 |
| %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %int) |
| call void asm sideeffect "; use $0", "s"(i32 %readfirstlane) |
| ret void |
| } |
| |
| define void @test_readfirstlane_half(ptr addrspace(1) %out, half %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_half: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s4 |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_half: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s4 |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call half @llvm.amdgcn.readfirstlane.f16(half %src) |
| call void asm sideeffect "; use $0", "s"(half %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_float(ptr addrspace(1) %out, float %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_float: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s4 |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_float: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s4 |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call float @llvm.amdgcn.readfirstlane.f32(float %src) |
| call void asm sideeffect "; use $0", "s"(float %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_bfloat(ptr addrspace(1) %out, bfloat %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_bfloat: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s4 |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_bfloat: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s4 |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call bfloat @llvm.amdgcn.readfirstlane.bf16(bfloat %src) |
| call void asm sideeffect "; use $0", "s"(bfloat %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_i16(ptr addrspace(1) %out, i16 %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_i16: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 0xffff |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s4 |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_i16: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s4 |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call i16 @llvm.amdgcn.readfirstlane.i16(i16 %src) |
| call void asm sideeffect "; use $0", "s"(i16 %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v2f16(ptr addrspace(1) %out, <2 x half> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v2f16: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s4 |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v2f16: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s4 |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> %src) |
| call void asm sideeffect "; use $0", "s"(<2 x half> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v2f32(ptr addrspace(1) %out, <2 x float> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v2f32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:5] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v2f32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:5] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <2 x float> @llvm.amdgcn.readfirstlane.v2f32(<2 x float> %src) |
| call void asm sideeffect "; use $0", "s"(<2 x float> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v3f32(ptr addrspace(1) %out, <3 x float> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v3f32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:6] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v3f32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:6] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <3 x float> @llvm.amdgcn.readfirstlane.v3f32(<3 x float> %src) |
| call void asm sideeffect "; use $0", "s"(<3 x float> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v4f32(ptr addrspace(1) %out, <4 x float> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v4f32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:7] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v4f32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:7] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <4 x float> @llvm.amdgcn.readfirstlane.v4f32(<4 x float> %src) |
| call void asm sideeffect "; use $0", "s"(<4 x float> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v8f32(ptr addrspace(1) %out, <8 x float> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v8f32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:11] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v8f32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:11] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <8 x float> @llvm.amdgcn.readfirstlane.v8f32(<8 x float> %src) |
| call void asm sideeffect "; use $0", "s"(<8 x float> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v16f32(ptr addrspace(1) %out, <16 x float> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v16f32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s19, v17 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s18, v16 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s17, v15 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s16, v14 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s15, v13 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s14, v12 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s13, v11 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s12, v10 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:19] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v16f32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s12, v10 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s13, v11 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s14, v12 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s15, v13 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s16, v14 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s17, v15 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s18, v16 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s19, v17 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:19] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <16 x float> @llvm.amdgcn.readfirstlane.v16f32(<16 x float> %src) |
| call void asm sideeffect "; use $0", "s"(<16 x float> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v32f32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; CHECK-SDAG-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill |
| ; CHECK-SDAG-NEXT: s_mov_b64 exec, s[4:5] |
| ; CHECK-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 |
| ; CHECK-SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s61, v27 |
| ; CHECK-SDAG-NEXT: buffer_load_dword v27, off, s[0:3], s32 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s36, 0 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s37, 1 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s38, 2 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s39, 3 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s48, 4 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s49, 5 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s50, 6 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s51, 7 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s52, 8 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s53, 9 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s54, 10 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s55, 11 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s64, 12 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s65, 13 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s66, 14 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s67, 15 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s64, v30 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s55, v21 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s54, v20 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s53, v19 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s52, v18 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s51, v17 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s50, v16 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s49, v15 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s48, v14 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s39, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s38, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s37, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s36, v2 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s63, v29 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s62, v28 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s60, v26 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s59, v25 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s58, v24 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s57, v23 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s56, v22 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s47, v13 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s46, v12 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s45, v11 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s44, v10 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s43, v9 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s42, v8 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s41, v7 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s40, v6 |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s66, v0 |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s67, v1 |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s65, v27 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[36:67] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s67, v31, 15 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s66, v31, 14 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s65, v31, 13 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s64, v31, 12 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s55, v31, 11 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s54, v31, 10 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s53, v31, 9 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s52, v31, 8 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s51, v31, 7 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s50, v31, 6 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s49, v31, 5 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s48, v31, 4 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s39, v31, 3 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s38, v31, 2 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s37, v31, 1 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s36, v31, 0 |
| ; CHECK-SDAG-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; CHECK-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload |
| ; CHECK-SDAG-NEXT: s_mov_b64 exec, s[4:5] |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v32f32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; CHECK-GISEL-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill |
| ; CHECK-GISEL-NEXT: s_mov_b64 exec, s[4:5] |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s36, 0 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s36, v2 |
| ; CHECK-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], s32 |
| ; CHECK-GISEL-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 |
| ; CHECK-GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s37, 1 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s38, 2 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s39, 3 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s48, 4 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s49, 5 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s50, 6 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s51, 7 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s52, 8 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s53, 9 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s54, 10 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s55, 11 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s64, 12 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s65, 13 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s66, 14 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s67, 15 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s37, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s38, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s39, v5 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s48, v14 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s49, v15 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s50, v16 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s51, v17 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s52, v18 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s53, v19 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s54, v20 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s55, v21 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s64, v30 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s40, v6 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s41, v7 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s42, v8 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s43, v9 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s44, v10 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s45, v11 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s46, v12 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s47, v13 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s56, v22 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s57, v23 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s58, v24 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s59, v25 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s60, v26 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s61, v27 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s62, v28 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s63, v29 |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s65, v0 |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s66, v1 |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s67, v2 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[36:67] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s67, v31, 15 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s66, v31, 14 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s65, v31, 13 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s64, v31, 12 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s55, v31, 11 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s54, v31, 10 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s53, v31, 9 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s52, v31, 8 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s51, v31, 7 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s50, v31, 6 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s49, v31, 5 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s48, v31, 4 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s39, v31, 3 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s38, v31, 2 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s37, v31, 1 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s36, v31, 0 |
| ; CHECK-GISEL-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; CHECK-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload |
| ; CHECK-GISEL-NEXT: s_mov_b64 exec, s[4:5] |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <32 x float> @llvm.amdgcn.readfirstlane.v32f32(<32 x float> %src) |
| call void asm sideeffect "; use $0", "s"(<32 x float> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v2i32(ptr addrspace(1) %out, <2 x i32> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v2i32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:5] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v2i32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:5] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src) |
| call void asm sideeffect "; use $0", "s"(<2 x i32> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v3i32(ptr addrspace(1) %out, <3 x i32> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v3i32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:6] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v3i32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:6] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> %src) |
| call void asm sideeffect "; use $0", "s"(<3 x i32> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v4i32(ptr addrspace(1) %out, <4 x i32> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v4i32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:7] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v4i32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:7] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src) |
| call void asm sideeffect "; use $0", "s"(<4 x i32> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v5i32(ptr addrspace(1) %out, <5 x i32> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v5i32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:8] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v5i32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:8] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <5 x i32> @llvm.amdgcn.readfirstlane.v5i32(<5 x i32> %src) |
| call void asm sideeffect "; use $0", "s"(<5 x i32> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v6i32(ptr addrspace(1) %out, <6 x i32> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v6i32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:9] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v6i32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:9] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <6 x i32> @llvm.amdgcn.readfirstlane.v6i32(<6 x i32> %src) |
| call void asm sideeffect "; use $0", "s"(<6 x i32> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v7i32(ptr addrspace(1) %out, <7 x i32> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v7i32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:10] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v7i32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:10] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <7 x i32> @llvm.amdgcn.readfirstlane.v7i32(<7 x i32> %src) |
| call void asm sideeffect "; use $0", "s"(<7 x i32> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v8i32(ptr addrspace(1) %out, <8 x i32> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v8i32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:11] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v8i32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:11] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> %src) |
| call void asm sideeffect "; use $0", "s"(<8 x i32> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v16i32(ptr addrspace(1) %out, <16 x i32> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v16i32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s19, v17 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s18, v16 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s17, v15 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s16, v14 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s15, v13 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s14, v12 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s13, v11 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s12, v10 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:19] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v16i32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s12, v10 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s13, v11 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s14, v12 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s15, v13 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s16, v14 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s17, v15 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s18, v16 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s19, v17 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:19] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <16 x i32> @llvm.amdgcn.readfirstlane.v16i32(<16 x i32> %src) |
| call void asm sideeffect "; use $0", "s"(<16 x i32> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v32i32: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; CHECK-SDAG-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill |
| ; CHECK-SDAG-NEXT: s_mov_b64 exec, s[4:5] |
| ; CHECK-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 |
| ; CHECK-SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s61, v27 |
| ; CHECK-SDAG-NEXT: buffer_load_dword v27, off, s[0:3], s32 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s36, 0 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s37, 1 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s38, 2 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s39, 3 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s48, 4 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s49, 5 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s50, 6 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s51, 7 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s52, 8 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s53, 9 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s54, 10 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s55, 11 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s64, 12 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s65, 13 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s66, 14 |
| ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s67, 15 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s64, v30 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s55, v21 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s54, v20 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s53, v19 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s52, v18 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s51, v17 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s50, v16 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s49, v15 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s48, v14 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s39, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s38, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s37, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s36, v2 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s63, v29 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s62, v28 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s60, v26 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s59, v25 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s58, v24 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s57, v23 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s56, v22 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s47, v13 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s46, v12 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s45, v11 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s44, v10 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s43, v9 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s42, v8 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s41, v7 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s40, v6 |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s66, v0 |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s67, v1 |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s65, v27 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[36:67] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s67, v31, 15 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s66, v31, 14 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s65, v31, 13 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s64, v31, 12 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s55, v31, 11 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s54, v31, 10 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s53, v31, 9 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s52, v31, 8 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s51, v31, 7 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s50, v31, 6 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s49, v31, 5 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s48, v31, 4 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s39, v31, 3 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s38, v31, 2 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s37, v31, 1 |
| ; CHECK-SDAG-NEXT: v_readlane_b32 s36, v31, 0 |
| ; CHECK-SDAG-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; CHECK-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload |
| ; CHECK-SDAG-NEXT: s_mov_b64 exec, s[4:5] |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v32i32: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; CHECK-GISEL-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill |
| ; CHECK-GISEL-NEXT: s_mov_b64 exec, s[4:5] |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s36, 0 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s36, v2 |
| ; CHECK-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], s32 |
| ; CHECK-GISEL-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 |
| ; CHECK-GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s37, 1 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s38, 2 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s39, 3 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s48, 4 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s49, 5 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s50, 6 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s51, 7 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s52, 8 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s53, 9 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s54, 10 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s55, 11 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s64, 12 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s65, 13 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s66, 14 |
| ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s67, 15 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s37, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s38, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s39, v5 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s48, v14 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s49, v15 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s50, v16 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s51, v17 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s52, v18 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s53, v19 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s54, v20 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s55, v21 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s64, v30 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s40, v6 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s41, v7 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s42, v8 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s43, v9 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s44, v10 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s45, v11 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s46, v12 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s47, v13 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s56, v22 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s57, v23 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s58, v24 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s59, v25 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s60, v26 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s61, v27 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s62, v28 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s63, v29 |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s65, v0 |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s66, v1 |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s67, v2 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[36:67] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s67, v31, 15 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s66, v31, 14 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s65, v31, 13 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s64, v31, 12 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s55, v31, 11 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s54, v31, 10 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s53, v31, 9 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s52, v31, 8 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s51, v31, 7 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s50, v31, 6 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s49, v31, 5 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s48, v31, 4 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s39, v31, 3 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s38, v31, 2 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s37, v31, 1 |
| ; CHECK-GISEL-NEXT: v_readlane_b32 s36, v31, 0 |
| ; CHECK-GISEL-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; CHECK-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload |
| ; CHECK-GISEL-NEXT: s_mov_b64 exec, s[4:5] |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <32 x i32> @llvm.amdgcn.readfirstlane.v32i32(<32 x i32> %src) |
| call void asm sideeffect "; use $0", "s"(<32 x i32> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v8i16(ptr addrspace(1) %out, <8 x i16> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v8i16: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:7] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v8i16: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:7] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <8 x i16> @llvm.amdgcn.readfirstlane.v8i16(<8 x i16> %src) |
| call void asm sideeffect "; use $0", "s"(<8 x i16> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v16i16(ptr addrspace(1) %out, <16 x i16> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v16i16: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:11] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v16i16: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:11] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <16 x i16> @llvm.amdgcn.readfirstlane.v16i16(<16 x i16> %src) |
| call void asm sideeffect "; use $0", "s"(<16 x i16> %x) |
| ret void |
| } |
| |
| define void @test_readfirstlane_v32i16(ptr addrspace(1) %out, <32 x i16> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v32i16: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s19, v17 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s18, v16 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s17, v15 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s16, v14 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s15, v13 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s14, v12 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s13, v11 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s12, v10 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:19] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v32i16: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s12, v10 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s13, v11 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s14, v12 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s15, v13 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s16, v14 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s17, v15 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s18, v16 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s19, v17 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:19] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <32 x i16> @llvm.amdgcn.readfirstlane.v32i16(<32 x i16> %src) |
| call void asm sideeffect "; use $0", "s"(<32 x i16> %x) |
| ret void |
| } |
| |
| |
| define void @test_readfirstlane_v32f16(ptr addrspace(1) %out, <32 x half> %src) { |
| ; CHECK-SDAG-LABEL: test_readfirstlane_v32f16: |
| ; CHECK-SDAG: ; %bb.0: |
| ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s19, v17 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s18, v16 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s17, v15 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s16, v14 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s15, v13 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s14, v12 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s13, v11 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s12, v10 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-SDAG-NEXT: ;;#ASMSTART |
| ; CHECK-SDAG-NEXT: ; use s[4:19] |
| ; CHECK-SDAG-NEXT: ;;#ASMEND |
| ; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-GISEL-LABEL: test_readfirstlane_v32f16: |
| ; CHECK-GISEL: ; %bb.0: |
| ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s11, v9 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s12, v10 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s13, v11 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s14, v12 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s15, v13 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s16, v14 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s17, v15 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s18, v16 |
| ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s19, v17 |
| ; CHECK-GISEL-NEXT: ;;#ASMSTART |
| ; CHECK-GISEL-NEXT: ; use s[4:19] |
| ; CHECK-GISEL-NEXT: ;;#ASMEND |
| ; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %x = call <32 x half> @llvm.amdgcn.readfirstlane.v32f16(<32 x half> %src) |
| call void asm sideeffect "; use $0", "s"(<32 x half> %x) |
| ret void |
| } |
| |