blob: da2a3ce6bcbcc9e1be3d5404f259e223099e0324 [file] [log] [blame]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX802-SDAG %s
3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1010-SDAG %s
4; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GFX1100-SDAG %s
5
David Green5a81a552025-01-27 22:21:12 +00006; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs -global-isel -global-isel-abort=2 < %s | FileCheck -check-prefixes=GFX802-GISEL %s
7; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs -global-isel -global-isel-abort=2 < %s | FileCheck -check-prefixes=GFX1010-GISEL %s
8; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 -global-isel -global-isel-abort=2 < %s | FileCheck -check-prefixes=GFX1100-GISEL %s
Tim Renouf2a99fa22018-02-28 19:10:32 +00009
10declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0
Vikram Hegde5feb32b2024-06-25 14:35:19 +053011declare i64 @llvm.amdgcn.writelane.i64(i64, i32, i64) #0
12declare double @llvm.amdgcn.writelane.f64(double, i32, double) #0
Tim Renouf2a99fa22018-02-28 19:10:32 +000013
Vikram Hegde5feb32b2024-06-25 14:35:19 +053014define amdgpu_kernel void @test_writelane_sreg_i32(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
15; GFX802-SDAG-LABEL: test_writelane_sreg_i32:
16; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -050017; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Jun Wang31f39c82025-04-15 15:17:33 -070018; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
19; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
20; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +053021; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
22; GFX802-SDAG-NEXT: s_mov_b32 m0, s3
23; GFX802-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0
24; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0
25; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1
26; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
27; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s3
28; GFX802-SDAG-NEXT: v_writelane_b32 v2, s2, m0
29; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2
30; GFX802-SDAG-NEXT: s_endpgm
31;
32; GFX1010-SDAG-LABEL: test_writelane_sreg_i32:
33; GFX1010-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -050034; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +053035; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0
36; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
37; GFX1010-SDAG-NEXT: s_load_dword s4, s[0:1], 0x0
38; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
39; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4
40; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s3
41; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1]
42; GFX1010-SDAG-NEXT: s_endpgm
43;
44; GFX1100-SDAG-LABEL: test_writelane_sreg_i32:
45; GFX1100-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -050046; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +053047; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
48; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
49; GFX1100-SDAG-NEXT: s_load_b32 s4, s[0:1], 0x0
50; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
51; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s4
52; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s3
53; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +053054; GFX1100-SDAG-NEXT: s_endpgm
55;
56; GFX802-GISEL-LABEL: test_writelane_sreg_i32:
57; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -050058; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Jun Wang31f39c82025-04-15 15:17:33 -070059; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
60; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
61; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +053062; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
63; GFX802-GISEL-NEXT: s_mov_b32 m0, s3
64; GFX802-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0
65; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
66; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
67; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
68; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3
69; GFX802-GISEL-NEXT: v_writelane_b32 v2, s2, m0
70; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2
71; GFX802-GISEL-NEXT: s_endpgm
72;
73; GFX1010-GISEL-LABEL: test_writelane_sreg_i32:
74; GFX1010-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -050075; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +053076; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0
77; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
78; GFX1010-GISEL-NEXT: s_load_dword s4, s[0:1], 0x0
79; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
80; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4
81; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s3
82; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
83; GFX1010-GISEL-NEXT: s_endpgm
84;
85; GFX1100-GISEL-LABEL: test_writelane_sreg_i32:
86; GFX1100-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -050087; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +053088; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0
89; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
90; GFX1100-GISEL-NEXT: s_load_b32 s4, s[0:1], 0x0
91; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
92; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s4
93; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s3
94; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +053095; GFX1100-GISEL-NEXT: s_endpgm
Matt Arsenaultad386a82022-11-28 14:13:14 -050096 %oldval = load i32, ptr addrspace(1) %out
Vikram Hegde5feb32b2024-06-25 14:35:19 +053097 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 %src1, i32 %oldval)
Matt Arsenaultad386a82022-11-28 14:13:14 -050098 store i32 %writelane, ptr addrspace(1) %out, align 4
Tim Renouf2a99fa22018-02-28 19:10:32 +000099 ret void
100}
101
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530102define amdgpu_kernel void @test_writelane_sreg_i64(ptr addrspace(1) %out, i64 %src0, i32 %src1) #1 {
103; GFX802-SDAG-LABEL: test_writelane_sreg_i64:
104; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500105; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
106; GFX802-SDAG-NEXT: s_load_dword s6, s[8:9], 0x10
Jun Wang31f39c82025-04-15 15:17:33 -0700107; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
108; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
109; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530110; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
111; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
112; GFX802-SDAG-NEXT: s_mov_b32 m0, s6
113; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
114; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
115; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
116; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s5
117; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s4
118; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, m0
119; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, m0
120; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
121; GFX802-SDAG-NEXT: s_endpgm
122;
123; GFX1010-SDAG-LABEL: test_writelane_sreg_i64:
124; GFX1010-SDAG: ; %bb.0:
125; GFX1010-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500126; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
127; GFX1010-SDAG-NEXT: s_load_dword s6, s[8:9], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530128; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
129; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
130; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
131; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
132; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s5
133; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4
Shilei Tian6548b632024-11-08 20:21:16 -0500134; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, s6
135; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s6
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530136; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
137; GFX1010-SDAG-NEXT: s_endpgm
138;
139; GFX1100-SDAG-LABEL: test_writelane_sreg_i64:
140; GFX1100-SDAG: ; %bb.0:
141; GFX1100-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500142; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
143; GFX1100-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530144; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
145; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500146; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530147; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500148; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s5
149; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s4
150; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, s6
151; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s6
152; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530153; GFX1100-SDAG-NEXT: s_endpgm
154;
155; GFX802-GISEL-LABEL: test_writelane_sreg_i64:
156; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500157; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
158; GFX802-GISEL-NEXT: s_load_dword s6, s[8:9], 0x10
Jun Wang31f39c82025-04-15 15:17:33 -0700159; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
160; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
161; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530162; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
163; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
164; GFX802-GISEL-NEXT: s_mov_b32 m0, s6
165; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
166; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
167; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
168; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s4
169; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s5
170; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, m0
171; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, m0
172; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
173; GFX802-GISEL-NEXT: s_endpgm
174;
175; GFX1010-GISEL-LABEL: test_writelane_sreg_i64:
176; GFX1010-GISEL: ; %bb.0:
177; GFX1010-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500178; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
179; GFX1010-GISEL-NEXT: s_load_dword s6, s[8:9], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530180; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
181; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
182; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
183; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
184; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4
185; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s5
Shilei Tian6548b632024-11-08 20:21:16 -0500186; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s6
187; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, s6
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530188; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
189; GFX1010-GISEL-NEXT: s_endpgm
190;
191; GFX1100-GISEL-LABEL: test_writelane_sreg_i64:
192; GFX1100-GISEL: ; %bb.0:
193; GFX1100-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500194; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
195; GFX1100-GISEL-NEXT: s_load_b32 s6, s[4:5], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530196; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
197; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500198; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530199; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500200; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s4
201; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s5
202; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s6
203; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, s6
204; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530205; GFX1100-GISEL-NEXT: s_endpgm
206 %oldval = load i64, ptr addrspace(1) %out
207 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 %src1, i64 %oldval)
208 store i64 %writelane, ptr addrspace(1) %out, align 4
209 ret void
210}
211
212define amdgpu_kernel void @test_writelane_sreg_f64(ptr addrspace(1) %out, double %src0, i32 %src1) #1 {
213; GFX802-SDAG-LABEL: test_writelane_sreg_f64:
214; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500215; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
216; GFX802-SDAG-NEXT: s_load_dword s6, s[8:9], 0x10
Jun Wang31f39c82025-04-15 15:17:33 -0700217; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
218; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
219; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530220; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
221; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
222; GFX802-SDAG-NEXT: s_mov_b32 m0, s6
223; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
224; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
225; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
226; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s5
227; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s4
228; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, m0
229; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, m0
230; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
231; GFX802-SDAG-NEXT: s_endpgm
232;
233; GFX1010-SDAG-LABEL: test_writelane_sreg_f64:
234; GFX1010-SDAG: ; %bb.0:
235; GFX1010-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500236; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
237; GFX1010-SDAG-NEXT: s_load_dword s6, s[8:9], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530238; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
239; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
240; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
241; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
242; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s5
243; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4
Shilei Tian6548b632024-11-08 20:21:16 -0500244; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, s6
245; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s6
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530246; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
247; GFX1010-SDAG-NEXT: s_endpgm
248;
249; GFX1100-SDAG-LABEL: test_writelane_sreg_f64:
250; GFX1100-SDAG: ; %bb.0:
251; GFX1100-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500252; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
253; GFX1100-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530254; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
255; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500256; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530257; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500258; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s5
259; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s4
260; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, s6
261; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s6
262; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530263; GFX1100-SDAG-NEXT: s_endpgm
264;
265; GFX802-GISEL-LABEL: test_writelane_sreg_f64:
266; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500267; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
268; GFX802-GISEL-NEXT: s_load_dword s6, s[8:9], 0x10
Jun Wang31f39c82025-04-15 15:17:33 -0700269; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
270; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
271; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530272; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
273; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
274; GFX802-GISEL-NEXT: s_mov_b32 m0, s6
275; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
276; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
277; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
278; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s4
279; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s5
280; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, m0
281; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, m0
282; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
283; GFX802-GISEL-NEXT: s_endpgm
284;
285; GFX1010-GISEL-LABEL: test_writelane_sreg_f64:
286; GFX1010-GISEL: ; %bb.0:
287; GFX1010-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500288; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
289; GFX1010-GISEL-NEXT: s_load_dword s6, s[8:9], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530290; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
291; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
292; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
293; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
294; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4
295; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s5
Shilei Tian6548b632024-11-08 20:21:16 -0500296; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s6
297; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, s6
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530298; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
299; GFX1010-GISEL-NEXT: s_endpgm
300;
301; GFX1100-GISEL-LABEL: test_writelane_sreg_f64:
302; GFX1100-GISEL: ; %bb.0:
303; GFX1100-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500304; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
305; GFX1100-GISEL-NEXT: s_load_b32 s6, s[4:5], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530306; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
307; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500308; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530309; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -0500310; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s4
311; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s5
312; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s6
313; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, s6
314; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530315; GFX1100-GISEL-NEXT: s_endpgm
316 %oldval = load double, ptr addrspace(1) %out
317 %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 %src1, double %oldval)
318 store double %writelane, ptr addrspace(1) %out, align 4
319 ret void
320}
321
322define amdgpu_kernel void @test_writelane_imm_sreg_i32(ptr addrspace(1) %out, i32 %src1) #1 {
323; GFX802-SDAG-LABEL: test_writelane_imm_sreg_i32:
324; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500325; GFX802-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
326; GFX802-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8
Jun Wang31f39c82025-04-15 15:17:33 -0700327; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
328; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
329; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530330; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
331; GFX802-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0
332; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0
333; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1
334; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
335; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s3
336; GFX802-SDAG-NEXT: v_writelane_b32 v2, 32, s2
337; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2
338; GFX802-SDAG-NEXT: s_endpgm
339;
340; GFX1010-SDAG-LABEL: test_writelane_imm_sreg_i32:
341; GFX1010-SDAG: ; %bb.0:
342; GFX1010-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500343; GFX1010-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
344; GFX1010-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530345; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0
346; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
347; GFX1010-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0
348; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
349; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s3
350; GFX1010-SDAG-NEXT: v_writelane_b32 v0, 32, s2
351; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1]
352; GFX1010-SDAG-NEXT: s_endpgm
353;
354; GFX1100-SDAG-LABEL: test_writelane_imm_sreg_i32:
355; GFX1100-SDAG: ; %bb.0:
356; GFX1100-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500357; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
358; GFX1100-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530359; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
360; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400361; GFX1100-SDAG-NEXT: s_load_b32 s3, s[0:1], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530362; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400363; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s3
364; GFX1100-SDAG-NEXT: v_writelane_b32 v0, 32, s2
365; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530366; GFX1100-SDAG-NEXT: s_endpgm
367;
368; GFX802-GISEL-LABEL: test_writelane_imm_sreg_i32:
369; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500370; GFX802-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
371; GFX802-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8
Jun Wang31f39c82025-04-15 15:17:33 -0700372; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
373; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
374; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530375; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
376; GFX802-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0
377; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
378; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
379; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
380; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3
381; GFX802-GISEL-NEXT: v_writelane_b32 v2, 32, s2
382; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2
383; GFX802-GISEL-NEXT: s_endpgm
384;
385; GFX1010-GISEL-LABEL: test_writelane_imm_sreg_i32:
386; GFX1010-GISEL: ; %bb.0:
387; GFX1010-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500388; GFX1010-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
389; GFX1010-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530390; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0
391; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
392; GFX1010-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0
393; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
394; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s3
395; GFX1010-GISEL-NEXT: v_writelane_b32 v0, 32, s2
396; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
397; GFX1010-GISEL-NEXT: s_endpgm
398;
399; GFX1100-GISEL-LABEL: test_writelane_imm_sreg_i32:
400; GFX1100-GISEL: ; %bb.0:
401; GFX1100-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500402; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
403; GFX1100-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530404; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0
405; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400406; GFX1100-GISEL-NEXT: s_load_b32 s3, s[0:1], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530407; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400408; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s3
409; GFX1100-GISEL-NEXT: v_writelane_b32 v0, 32, s2
410; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530411; GFX1100-GISEL-NEXT: s_endpgm
Matt Arsenaultad386a82022-11-28 14:13:14 -0500412 %oldval = load i32, ptr addrspace(1) %out
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530413 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 32, i32 %src1, i32 %oldval)
Matt Arsenaultad386a82022-11-28 14:13:14 -0500414 store i32 %writelane, ptr addrspace(1) %out, align 4
Tim Renouf2a99fa22018-02-28 19:10:32 +0000415 ret void
416}
417
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530418define amdgpu_kernel void @test_writelane_imm_sreg_i64(ptr addrspace(1) %out, i32 %src1) #1 {
419; GFX802-SDAG-LABEL: test_writelane_imm_sreg_i64:
420; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500421; GFX802-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
422; GFX802-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8
Jun Wang31f39c82025-04-15 15:17:33 -0700423; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
424; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
425; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530426; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
427; GFX802-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
428; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
429; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
430; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
431; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
432; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2
433; GFX802-SDAG-NEXT: v_writelane_b32 v1, 0, s4
434; GFX802-SDAG-NEXT: v_writelane_b32 v0, 32, s4
435; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
436; GFX802-SDAG-NEXT: s_endpgm
437;
438; GFX1010-SDAG-LABEL: test_writelane_imm_sreg_i64:
439; GFX1010-SDAG: ; %bb.0:
440; GFX1010-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500441; GFX1010-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
442; GFX1010-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530443; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
444; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
445; GFX1010-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
446; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
447; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s3
448; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s2
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400449; GFX1010-SDAG-NEXT: v_writelane_b32 v1, 0, s4
450; GFX1010-SDAG-NEXT: v_writelane_b32 v0, 32, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530451; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
452; GFX1010-SDAG-NEXT: s_endpgm
453;
454; GFX1100-SDAG-LABEL: test_writelane_imm_sreg_i64:
455; GFX1100-SDAG: ; %bb.0:
456; GFX1100-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500457; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
458; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530459; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
460; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400461; GFX1100-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530462; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400463; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s3
464; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530465; GFX1100-SDAG-NEXT: v_writelane_b32 v1, 0, s4
466; GFX1100-SDAG-NEXT: v_writelane_b32 v0, 32, s4
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400467; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530468; GFX1100-SDAG-NEXT: s_endpgm
469;
470; GFX802-GISEL-LABEL: test_writelane_imm_sreg_i64:
471; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500472; GFX802-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
473; GFX802-GISEL-NEXT: s_load_dword s4, s[8:9], 0x8
Jun Wang31f39c82025-04-15 15:17:33 -0700474; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
475; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
476; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530477; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
478; GFX802-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
479; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
480; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
481; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
482; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s2
483; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s3
484; GFX802-GISEL-NEXT: v_writelane_b32 v0, 32, s4
485; GFX802-GISEL-NEXT: v_writelane_b32 v1, 0, s4
486; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
487; GFX802-GISEL-NEXT: s_endpgm
488;
489; GFX1010-GISEL-LABEL: test_writelane_imm_sreg_i64:
490; GFX1010-GISEL: ; %bb.0:
491; GFX1010-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500492; GFX1010-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
493; GFX1010-GISEL-NEXT: s_load_dword s4, s[8:9], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530494; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
495; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
496; GFX1010-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
497; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
498; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s2
499; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s3
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400500; GFX1010-GISEL-NEXT: v_writelane_b32 v0, 32, s4
501; GFX1010-GISEL-NEXT: v_writelane_b32 v1, 0, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530502; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
503; GFX1010-GISEL-NEXT: s_endpgm
504;
505; GFX1100-GISEL-LABEL: test_writelane_imm_sreg_i64:
506; GFX1100-GISEL: ; %bb.0:
507; GFX1100-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500508; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
509; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530510; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
511; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400512; GFX1100-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530513; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400514; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s2
515; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s3
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530516; GFX1100-GISEL-NEXT: v_writelane_b32 v0, 32, s4
517; GFX1100-GISEL-NEXT: v_writelane_b32 v1, 0, s4
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400518; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530519; GFX1100-GISEL-NEXT: s_endpgm
520 %oldval = load i64, ptr addrspace(1) %out
521 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 32, i32 %src1, i64 %oldval)
522 store i64 %writelane, ptr addrspace(1) %out, align 4
523 ret void
524}
525
526define amdgpu_kernel void @test_writelane_imm_sreg_f64(ptr addrspace(1) %out, i32 %src1) #1 {
527; GFX802-SDAG-LABEL: test_writelane_imm_sreg_f64:
528; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500529; GFX802-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
530; GFX802-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8
Jun Wang31f39c82025-04-15 15:17:33 -0700531; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530532; GFX802-SDAG-NEXT: s_mov_b32 s5, 0x40400000
Jun Wang31f39c82025-04-15 15:17:33 -0700533; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530534; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
535; GFX802-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
536; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
537; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
Jun Wang31f39c82025-04-15 15:17:33 -0700538; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530539; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
540; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
541; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
542; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2
543; GFX802-SDAG-NEXT: v_writelane_b32 v1, s5, m0
544; GFX802-SDAG-NEXT: v_writelane_b32 v0, 0, s4
545; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
546; GFX802-SDAG-NEXT: s_endpgm
547;
548; GFX1010-SDAG-LABEL: test_writelane_imm_sreg_f64:
549; GFX1010-SDAG: ; %bb.0:
550; GFX1010-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500551; GFX1010-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
552; GFX1010-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530553; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
554; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
555; GFX1010-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
556; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
557; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s3
558; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s2
559; GFX1010-SDAG-NEXT: s_mov_b32 s2, 0x40400000
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400560; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s2, s4
561; GFX1010-SDAG-NEXT: v_writelane_b32 v0, 0, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530562; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
563; GFX1010-SDAG-NEXT: s_endpgm
564;
565; GFX1100-SDAG-LABEL: test_writelane_imm_sreg_f64:
566; GFX1100-SDAG: ; %bb.0:
567; GFX1100-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500568; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
569; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530570; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
571; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400572; GFX1100-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530573; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400574; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s3
575; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2
576; GFX1100-SDAG-NEXT: s_mov_b32 s2, 0x40400000
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530577; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400578; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s2, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530579; GFX1100-SDAG-NEXT: v_writelane_b32 v0, 0, s4
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400580; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530581; GFX1100-SDAG-NEXT: s_endpgm
582;
583; GFX802-GISEL-LABEL: test_writelane_imm_sreg_f64:
584; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500585; GFX802-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
586; GFX802-GISEL-NEXT: s_load_dword s4, s[8:9], 0x8
Jun Wang31f39c82025-04-15 15:17:33 -0700587; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530588; GFX802-GISEL-NEXT: s_mov_b32 s5, 0x40400000
Jun Wang31f39c82025-04-15 15:17:33 -0700589; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530590; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
591; GFX802-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
592; GFX802-GISEL-NEXT: s_mov_b32 m0, s4
593; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
Jun Wang31f39c82025-04-15 15:17:33 -0700594; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530595; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
596; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
597; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s2
598; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s3
599; GFX802-GISEL-NEXT: v_writelane_b32 v0, 0, s4
600; GFX802-GISEL-NEXT: v_writelane_b32 v1, s5, m0
601; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
602; GFX802-GISEL-NEXT: s_endpgm
603;
604; GFX1010-GISEL-LABEL: test_writelane_imm_sreg_f64:
605; GFX1010-GISEL: ; %bb.0:
606; GFX1010-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500607; GFX1010-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
608; GFX1010-GISEL-NEXT: s_load_dword s4, s[8:9], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530609; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
610; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
611; GFX1010-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
612; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
613; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s2
614; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s3
615; GFX1010-GISEL-NEXT: s_mov_b32 s2, 0x40400000
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400616; GFX1010-GISEL-NEXT: v_writelane_b32 v0, 0, s4
617; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s2, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530618; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
619; GFX1010-GISEL-NEXT: s_endpgm
620;
621; GFX1100-GISEL-LABEL: test_writelane_imm_sreg_f64:
622; GFX1100-GISEL: ; %bb.0:
623; GFX1100-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -0500624; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
625; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530626; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
627; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400628; GFX1100-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530629; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400630; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s2
631; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s3
632; GFX1100-GISEL-NEXT: s_mov_b32 s2, 0x40400000
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530633; GFX1100-GISEL-NEXT: v_writelane_b32 v0, 0, s4
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400634; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s2, s4
635; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530636; GFX1100-GISEL-NEXT: s_endpgm
637 %oldval = load double, ptr addrspace(1) %out
638 %writelane = call double @llvm.amdgcn.writelane.f64(double 32.0, i32 %src1, double %oldval)
639 store double %writelane, ptr addrspace(1) %out, align 4
640 ret void
641}
642
643define amdgpu_kernel void @test_writelane_vreg_lane_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
644; GFX802-SDAG-LABEL: test_writelane_vreg_lane_i32:
645; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500646; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530647; GFX802-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0
Jun Wang31f39c82025-04-15 15:17:33 -0700648; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
649; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
650; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530651; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
652; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
653; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v0
654; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
655; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, 4, v0
656; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
657; GFX802-SDAG-NEXT: flat_load_dword v0, v[0:1]
658; GFX802-SDAG-NEXT: s_load_dword s2, s[0:1], 0x0
659; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
660; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s2
661; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
662; GFX802-SDAG-NEXT: v_readfirstlane_b32 s2, v0
663; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0
664; GFX802-SDAG-NEXT: s_nop 2
665; GFX802-SDAG-NEXT: v_writelane_b32 v2, 12, s2
666; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1
667; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2
668; GFX802-SDAG-NEXT: s_endpgm
669;
670; GFX1010-SDAG-LABEL: test_writelane_vreg_lane_i32:
671; GFX1010-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500672; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530673; GFX1010-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0
674; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
675; GFX1010-SDAG-NEXT: global_load_dword v0, v0, s[2:3] offset:4
676; GFX1010-SDAG-NEXT: s_waitcnt_depctr 0xffe3
677; GFX1010-SDAG-NEXT: s_load_dword s2, s[0:1], 0x0
678; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
679; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s2
680; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
681; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s2, v0
682; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, 0
683; GFX1010-SDAG-NEXT: v_writelane_b32 v1, 12, s2
684; GFX1010-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
685; GFX1010-SDAG-NEXT: s_endpgm
686;
687; GFX1100-SDAG-LABEL: test_writelane_vreg_lane_i32:
688; GFX1100-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500689; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400690; GFX1100-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
691; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530692; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0
693; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
694; GFX1100-SDAG-NEXT: global_load_b32 v0, v0, s[2:3] offset:4
695; GFX1100-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x0
696; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
697; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s2
698; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
699; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v0
700; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
701; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
702; GFX1100-SDAG-NEXT: v_writelane_b32 v1, 12, s2
703; GFX1100-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530704; GFX1100-SDAG-NEXT: s_endpgm
705;
706; GFX802-GISEL-LABEL: test_writelane_vreg_lane_i32:
707; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500708; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530709; GFX802-GISEL-NEXT: v_lshlrev_b32_e32 v2, 3, v0
Jun Wang31f39c82025-04-15 15:17:33 -0700710; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
711; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
712; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530713; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
714; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s2
715; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s3
716; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
717; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
718; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, 4, v0
719; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
720; GFX802-GISEL-NEXT: flat_load_dword v0, v[0:1]
721; GFX802-GISEL-NEXT: s_load_dword s2, s[0:1], 0x0
722; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
723; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s2
724; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
725; GFX802-GISEL-NEXT: v_readfirstlane_b32 s2, v0
726; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
727; GFX802-GISEL-NEXT: s_nop 2
728; GFX802-GISEL-NEXT: v_writelane_b32 v2, 12, s2
729; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
730; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2
731; GFX802-GISEL-NEXT: s_endpgm
732;
733; GFX1010-GISEL-LABEL: test_writelane_vreg_lane_i32:
734; GFX1010-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500735; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530736; GFX1010-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0
737; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
738; GFX1010-GISEL-NEXT: global_load_dword v0, v0, s[2:3] offset:4
739; GFX1010-GISEL-NEXT: s_waitcnt_depctr 0xffe3
740; GFX1010-GISEL-NEXT: s_load_dword s2, s[0:1], 0x0
741; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
742; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s2
743; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
744; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s2, v0
745; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 0
746; GFX1010-GISEL-NEXT: v_writelane_b32 v1, 12, s2
747; GFX1010-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
748; GFX1010-GISEL-NEXT: s_endpgm
749;
750; GFX1100-GISEL-LABEL: test_writelane_vreg_lane_i32:
751; GFX1100-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500752; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400753; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
754; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530755; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0
756; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
757; GFX1100-GISEL-NEXT: global_load_b32 v0, v0, s[2:3] offset:4
758; GFX1100-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x0
759; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
760; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s2
761; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
762; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v0
763; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0
764; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
765; GFX1100-GISEL-NEXT: v_writelane_b32 v1, 12, s2
766; GFX1100-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530767; GFX1100-GISEL-NEXT: s_endpgm
Tim Renouf2a99fa22018-02-28 19:10:32 +0000768 %tid = call i32 @llvm.amdgcn.workitem.id.x()
Matt Arsenaultad386a82022-11-28 14:13:14 -0500769 %gep.in = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 %tid
770 %args = load <2 x i32>, ptr addrspace(1) %gep.in
771 %oldval = load i32, ptr addrspace(1) %out
Tim Renouf2a99fa22018-02-28 19:10:32 +0000772 %lane = extractelement <2 x i32> %args, i32 1
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530773 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 12, i32 %lane, i32 %oldval)
Matt Arsenaultad386a82022-11-28 14:13:14 -0500774 store i32 %writelane, ptr addrspace(1) %out, align 4
Tim Renouf2a99fa22018-02-28 19:10:32 +0000775 ret void
776}
777
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530778define amdgpu_kernel void @test_writelane_vreg_lane_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
779; GFX802-SDAG-LABEL: test_writelane_vreg_lane_i64:
780; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500781; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530782; GFX802-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
Jun Wang31f39c82025-04-15 15:17:33 -0700783; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
784; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
785; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530786; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
787; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
788; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v0
789; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
790; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, 8, v0
791; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
792; GFX802-SDAG-NEXT: flat_load_dword v2, v[0:1]
793; GFX802-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
794; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
795; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
796; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2
797; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
798; GFX802-SDAG-NEXT: v_readfirstlane_b32 s2, v2
799; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
800; GFX802-SDAG-NEXT: s_nop 2
801; GFX802-SDAG-NEXT: v_writelane_b32 v1, 0, s2
802; GFX802-SDAG-NEXT: v_writelane_b32 v0, 12, s2
803; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
804; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
805; GFX802-SDAG-NEXT: s_endpgm
806;
807; GFX1010-SDAG-LABEL: test_writelane_vreg_lane_i64:
808; GFX1010-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500809; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530810; GFX1010-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
811; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
812; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
813; GFX1010-SDAG-NEXT: global_load_dword v0, v0, s[2:3] offset:8
814; GFX1010-SDAG-NEXT: s_waitcnt_depctr 0xffe3
815; GFX1010-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
816; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
817; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s3
818; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
819; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s3, v0
820; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s2
821; GFX1010-SDAG-NEXT: v_writelane_b32 v1, 0, s3
822; GFX1010-SDAG-NEXT: v_writelane_b32 v0, 12, s3
823; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
824; GFX1010-SDAG-NEXT: s_endpgm
825;
826; GFX1100-SDAG-LABEL: test_writelane_vreg_lane_i64:
827; GFX1100-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500828; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400829; GFX1100-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
dyungadaff462024-07-14 18:48:54 -0700830; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400831; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
832; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530833; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
834; GFX1100-SDAG-NEXT: global_load_b32 v0, v0, s[2:3] offset:8
835; GFX1100-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
836; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
837; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s3
838; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
839; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v0
840; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2
841; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
842; GFX1100-SDAG-NEXT: v_writelane_b32 v1, 0, s3
843; GFX1100-SDAG-NEXT: v_writelane_b32 v0, 12, s3
844; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530845; GFX1100-SDAG-NEXT: s_endpgm
846;
847; GFX802-GISEL-LABEL: test_writelane_vreg_lane_i64:
848; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500849; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530850; GFX802-GISEL-NEXT: v_lshlrev_b32_e32 v2, 4, v0
Jun Wang31f39c82025-04-15 15:17:33 -0700851; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
852; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
853; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530854; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
855; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s2
856; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s3
857; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
858; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
859; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, 8, v0
860; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
861; GFX802-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
862; GFX802-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
863; GFX802-GISEL-NEXT: v_mov_b32_e32 v4, s1
864; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s0
865; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
866; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s2
867; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3
868; GFX802-GISEL-NEXT: v_readfirstlane_b32 s2, v0
869; GFX802-GISEL-NEXT: s_nop 3
870; GFX802-GISEL-NEXT: v_writelane_b32 v1, 12, s2
871; GFX802-GISEL-NEXT: v_writelane_b32 v2, 0, s2
872; GFX802-GISEL-NEXT: flat_store_dwordx2 v[3:4], v[1:2]
873; GFX802-GISEL-NEXT: s_endpgm
874;
875; GFX1010-GISEL-LABEL: test_writelane_vreg_lane_i64:
876; GFX1010-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500877; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530878; GFX1010-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0
879; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
880; GFX1010-GISEL-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] offset:8
881; GFX1010-GISEL-NEXT: s_waitcnt_depctr 0xffe3
882; GFX1010-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
883; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
884; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s2
885; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, s3
886; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s2, v0
887; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 0
888; GFX1010-GISEL-NEXT: v_writelane_b32 v1, 12, s2
889; GFX1010-GISEL-NEXT: v_writelane_b32 v2, 0, s2
890; GFX1010-GISEL-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1]
891; GFX1010-GISEL-NEXT: s_endpgm
892;
893; GFX1100-GISEL-LABEL: test_writelane_vreg_lane_i64:
894; GFX1100-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500895; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400896; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
897; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530898; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0
899; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
900; GFX1100-GISEL-NEXT: global_load_b64 v[0:1], v0, s[2:3] offset:8
901; GFX1100-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
902; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
903; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s2
904; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, s3
905; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v0
906; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0
907; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
908; GFX1100-GISEL-NEXT: v_writelane_b32 v1, 12, s2
909; GFX1100-GISEL-NEXT: v_writelane_b32 v2, 0, s2
910; GFX1100-GISEL-NEXT: global_store_b64 v0, v[1:2], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530911; GFX1100-GISEL-NEXT: s_endpgm
912 %tid = call i32 @llvm.amdgcn.workitem.id.x()
913 %gep.in = getelementptr <2 x i64>, ptr addrspace(1) %in, i32 %tid
914 %args = load <2 x i64>, ptr addrspace(1) %gep.in
915 %oldval = load i64, ptr addrspace(1) %out
916 %lane = extractelement <2 x i64> %args, i32 1
917 %lane32 = trunc i64 %lane to i32
918 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 12, i32 %lane32, i64 %oldval)
919 store i64 %writelane, ptr addrspace(1) %out, align 4
920 ret void
921}
922
923define amdgpu_kernel void @test_writelane_vreg_lane_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
924; GFX802-SDAG-LABEL: test_writelane_vreg_lane_f64:
925; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500926; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530927; GFX802-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
Jun Wang31f39c82025-04-15 15:17:33 -0700928; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
929; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
930; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530931; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
932; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
933; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v0
934; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
935; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, 8, v0
936; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
937; GFX802-SDAG-NEXT: flat_load_dword v2, v[0:1]
938; GFX802-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
Jun Wang31f39c82025-04-15 15:17:33 -0700939; GFX802-SDAG-NEXT: s_mov_b32 s4, 0x40280000
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530940; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530941; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2
Pierre van Houtryve52317362025-02-26 13:14:03 +0100942; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530943; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530944; GFX802-SDAG-NEXT: v_readfirstlane_b32 s2, v2
Pierre van Houtryve52317362025-02-26 13:14:03 +0100945; GFX802-SDAG-NEXT: s_mov_b32 m0, s2
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530946; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
947; GFX802-SDAG-NEXT: s_nop 1
Pierre van Houtryve52317362025-02-26 13:14:03 +0100948; GFX802-SDAG-NEXT: v_writelane_b32 v0, 0, s2
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530949; GFX802-SDAG-NEXT: v_writelane_b32 v1, s4, m0
950; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530951; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
952; GFX802-SDAG-NEXT: s_endpgm
953;
954; GFX1010-SDAG-LABEL: test_writelane_vreg_lane_f64:
955; GFX1010-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500956; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530957; GFX1010-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
958; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
959; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
960; GFX1010-SDAG-NEXT: global_load_dword v0, v0, s[2:3] offset:8
961; GFX1010-SDAG-NEXT: s_waitcnt_depctr 0xffe3
962; GFX1010-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
963; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
964; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s3
965; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
966; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s3, v0
967; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s2
968; GFX1010-SDAG-NEXT: s_mov_b32 s2, 0x40280000
969; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s2, s3
970; GFX1010-SDAG-NEXT: v_writelane_b32 v0, 0, s3
971; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
972; GFX1010-SDAG-NEXT: s_endpgm
973;
974; GFX1100-SDAG-LABEL: test_writelane_vreg_lane_f64:
975; GFX1100-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500976; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400977; GFX1100-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
dyungadaff462024-07-14 18:48:54 -0700978; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +0400979; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
980; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530981; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
982; GFX1100-SDAG-NEXT: global_load_b32 v0, v0, s[2:3] offset:8
983; GFX1100-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
984; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
985; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s3
986; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
987; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v0
988; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2
989; GFX1100-SDAG-NEXT: s_mov_b32 s2, 0x40280000
Ana Mihajlovic459b4e32025-03-13 10:26:20 +0100990; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530991; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s2, s3
992; GFX1100-SDAG-NEXT: v_writelane_b32 v0, 0, s3
993; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530994; GFX1100-SDAG-NEXT: s_endpgm
995;
996; GFX802-GISEL-LABEL: test_writelane_vreg_lane_f64:
997; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -0500998; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +0530999; GFX802-GISEL-NEXT: v_lshlrev_b32_e32 v2, 4, v0
Jun Wang31f39c82025-04-15 15:17:33 -07001000; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
1001; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
1002; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301003; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1004; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s2
1005; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s3
1006; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
1007; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1008; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, 8, v0
1009; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1010; GFX802-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
1011; GFX802-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
Jun Wang31f39c82025-04-15 15:17:33 -07001012; GFX802-GISEL-NEXT: s_mov_b32 s4, 0x40280000
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301013; GFX802-GISEL-NEXT: v_mov_b32_e32 v4, s1
1014; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s0
1015; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1016; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s2
1017; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3
1018; GFX802-GISEL-NEXT: v_readfirstlane_b32 s2, v0
1019; GFX802-GISEL-NEXT: s_mov_b32 m0, s2
1020; GFX802-GISEL-NEXT: s_nop 2
1021; GFX802-GISEL-NEXT: v_writelane_b32 v1, 0, s2
1022; GFX802-GISEL-NEXT: v_writelane_b32 v2, s4, m0
1023; GFX802-GISEL-NEXT: flat_store_dwordx2 v[3:4], v[1:2]
1024; GFX802-GISEL-NEXT: s_endpgm
1025;
1026; GFX1010-GISEL-LABEL: test_writelane_vreg_lane_f64:
1027; GFX1010-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001028; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301029; GFX1010-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0
1030; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1031; GFX1010-GISEL-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] offset:8
1032; GFX1010-GISEL-NEXT: s_waitcnt_depctr 0xffe3
1033; GFX1010-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
1034; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1035; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s2
1036; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, s3
1037; GFX1010-GISEL-NEXT: s_mov_b32 s3, 0x40280000
1038; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s2, v0
1039; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 0
1040; GFX1010-GISEL-NEXT: v_writelane_b32 v1, 0, s2
1041; GFX1010-GISEL-NEXT: v_writelane_b32 v2, s3, s2
1042; GFX1010-GISEL-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1]
1043; GFX1010-GISEL-NEXT: s_endpgm
1044;
1045; GFX1100-GISEL-LABEL: test_writelane_vreg_lane_f64:
1046; GFX1100-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001047; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001048; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1049; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301050; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0
1051; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1052; GFX1100-GISEL-NEXT: global_load_b64 v[0:1], v0, s[2:3] offset:8
1053; GFX1100-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
1054; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1055; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s2
1056; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, s3
1057; GFX1100-GISEL-NEXT: s_mov_b32 s3, 0x40280000
1058; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v0
1059; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0
1060; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
1061; GFX1100-GISEL-NEXT: v_writelane_b32 v1, 0, s2
1062; GFX1100-GISEL-NEXT: v_writelane_b32 v2, s3, s2
1063; GFX1100-GISEL-NEXT: global_store_b64 v0, v[1:2], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301064; GFX1100-GISEL-NEXT: s_endpgm
1065 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1066 %gep.in = getelementptr <2 x double>, ptr addrspace(1) %in, i32 %tid
1067 %args = load <2 x double>, ptr addrspace(1) %gep.in
1068 %oldval = load double, ptr addrspace(1) %out
1069 %lane = extractelement <2 x double> %args, i32 1
1070 %lane_cast = bitcast double %lane to i64
1071 %lane32 = trunc i64 %lane_cast to i32
1072 %writelane = call double @llvm.amdgcn.writelane.f64(double 12.0, i32 %lane32, double %oldval)
1073 store double %writelane, ptr addrspace(1) %out, align 4
1074 ret void
1075}
1076
1077define amdgpu_kernel void @test_writelane_m0_sreg_i32(ptr addrspace(1) %out, i32 %src1) #1 {
1078; GFX802-SDAG-LABEL: test_writelane_m0_sreg_i32:
1079; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001080; GFX802-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1081; GFX802-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301082; GFX802-SDAG-NEXT: ;;#ASMSTART
1083; GFX802-SDAG-NEXT: s_mov_b32 m0, -1
1084; GFX802-SDAG-NEXT: ;;#ASMEND
Jun Wang31f39c82025-04-15 15:17:33 -07001085; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301086; GFX802-SDAG-NEXT: s_mov_b32 s4, m0
1087; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1088; GFX802-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0
1089; GFX802-SDAG-NEXT: s_mov_b32 m0, s2
1090; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0
Jun Wang31f39c82025-04-15 15:17:33 -07001091; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
1092; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301093; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1094; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s3
1095; GFX802-SDAG-NEXT: v_writelane_b32 v2, s4, m0
Jun Wang31f39c82025-04-15 15:17:33 -07001096; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301097; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2
1098; GFX802-SDAG-NEXT: s_endpgm
1099;
1100; GFX1010-SDAG-LABEL: test_writelane_m0_sreg_i32:
1101; GFX1010-SDAG: ; %bb.0:
1102; GFX1010-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001103; GFX1010-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1104; GFX1010-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301105; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0
1106; GFX1010-SDAG-NEXT: ;;#ASMSTART
1107; GFX1010-SDAG-NEXT: s_mov_b32 m0, -1
1108; GFX1010-SDAG-NEXT: ;;#ASMEND
1109; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1110; GFX1010-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0
1111; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1112; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s3
1113; GFX1010-SDAG-NEXT: v_writelane_b32 v0, m0, s2
1114; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1]
1115; GFX1010-SDAG-NEXT: s_endpgm
1116;
1117; GFX1100-SDAG-LABEL: test_writelane_m0_sreg_i32:
1118; GFX1100-SDAG: ; %bb.0:
1119; GFX1100-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001120; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1121; GFX1100-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301122; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
1123; GFX1100-SDAG-NEXT: ;;#ASMSTART
1124; GFX1100-SDAG-NEXT: s_mov_b32 m0, -1
1125; GFX1100-SDAG-NEXT: ;;#ASMEND
1126; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001127; GFX1100-SDAG-NEXT: s_load_b32 s3, s[0:1], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301128; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001129; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s3
1130; GFX1100-SDAG-NEXT: v_writelane_b32 v0, m0, s2
1131; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301132; GFX1100-SDAG-NEXT: s_endpgm
1133;
1134; GFX802-GISEL-LABEL: test_writelane_m0_sreg_i32:
1135; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001136; GFX802-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1137; GFX802-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301138; GFX802-GISEL-NEXT: ;;#ASMSTART
1139; GFX802-GISEL-NEXT: s_mov_b32 m0, -1
1140; GFX802-GISEL-NEXT: ;;#ASMEND
Jun Wang31f39c82025-04-15 15:17:33 -07001141; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301142; GFX802-GISEL-NEXT: s_mov_b32 s4, m0
1143; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1144; GFX802-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0
1145; GFX802-GISEL-NEXT: s_mov_b32 m0, s2
1146; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
Jun Wang31f39c82025-04-15 15:17:33 -07001147; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
1148; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301149; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1150; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3
1151; GFX802-GISEL-NEXT: v_writelane_b32 v2, s4, m0
Jun Wang31f39c82025-04-15 15:17:33 -07001152; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301153; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2
1154; GFX802-GISEL-NEXT: s_endpgm
1155;
1156; GFX1010-GISEL-LABEL: test_writelane_m0_sreg_i32:
1157; GFX1010-GISEL: ; %bb.0:
1158; GFX1010-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001159; GFX1010-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1160; GFX1010-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301161; GFX1010-GISEL-NEXT: ;;#ASMSTART
1162; GFX1010-GISEL-NEXT: s_mov_b32 m0, -1
1163; GFX1010-GISEL-NEXT: ;;#ASMEND
1164; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0
1165; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1166; GFX1010-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0
1167; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1168; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s3
1169; GFX1010-GISEL-NEXT: v_writelane_b32 v0, m0, s2
1170; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
1171; GFX1010-GISEL-NEXT: s_endpgm
1172;
1173; GFX1100-GISEL-LABEL: test_writelane_m0_sreg_i32:
1174; GFX1100-GISEL: ; %bb.0:
1175; GFX1100-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001176; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1177; GFX1100-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301178; GFX1100-GISEL-NEXT: ;;#ASMSTART
1179; GFX1100-GISEL-NEXT: s_mov_b32 m0, -1
1180; GFX1100-GISEL-NEXT: ;;#ASMEND
1181; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0
1182; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001183; GFX1100-GISEL-NEXT: s_load_b32 s3, s[0:1], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301184; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001185; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s3
1186; GFX1100-GISEL-NEXT: v_writelane_b32 v0, m0, s2
1187; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301188; GFX1100-GISEL-NEXT: s_endpgm
Matt Arsenaultad386a82022-11-28 14:13:14 -05001189 %oldval = load i32, ptr addrspace(1) %out
Matt Arsenaultbb0a6102019-06-14 21:16:06 +00001190 %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301191 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %m0, i32 %src1, i32 %oldval)
Matt Arsenaultad386a82022-11-28 14:13:14 -05001192 store i32 %writelane, ptr addrspace(1) %out, align 4
Tim Renouf2a99fa22018-02-28 19:10:32 +00001193 ret void
1194}
1195
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301196define amdgpu_kernel void @test_writelane_imm_i32(ptr addrspace(1) %out, i32 %src0) #1 {
1197; GFX802-SDAG-LABEL: test_writelane_imm_i32:
1198; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001199; GFX802-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1200; GFX802-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8
Jun Wang31f39c82025-04-15 15:17:33 -07001201; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
1202; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
1203; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301204; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1205; GFX802-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0
1206; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0
1207; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1
1208; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1209; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s3
1210; GFX802-SDAG-NEXT: v_writelane_b32 v2, s2, 32
1211; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2
1212; GFX802-SDAG-NEXT: s_endpgm
1213;
1214; GFX1010-SDAG-LABEL: test_writelane_imm_i32:
1215; GFX1010-SDAG: ; %bb.0:
1216; GFX1010-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001217; GFX1010-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1218; GFX1010-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301219; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0
1220; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1221; GFX1010-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0
1222; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1223; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s3
1224; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1225; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1]
1226; GFX1010-SDAG-NEXT: s_endpgm
1227;
1228; GFX1100-SDAG-LABEL: test_writelane_imm_i32:
1229; GFX1100-SDAG: ; %bb.0:
1230; GFX1100-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001231; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1232; GFX1100-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301233; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
1234; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001235; GFX1100-SDAG-NEXT: s_load_b32 s3, s[0:1], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301236; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001237; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s3
1238; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1239; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301240; GFX1100-SDAG-NEXT: s_endpgm
1241;
1242; GFX802-GISEL-LABEL: test_writelane_imm_i32:
1243; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001244; GFX802-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1245; GFX802-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8
Jun Wang31f39c82025-04-15 15:17:33 -07001246; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
1247; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
1248; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301249; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1250; GFX802-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0
1251; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
1252; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
1253; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1254; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3
1255; GFX802-GISEL-NEXT: v_writelane_b32 v2, s2, 32
1256; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2
1257; GFX802-GISEL-NEXT: s_endpgm
1258;
1259; GFX1010-GISEL-LABEL: test_writelane_imm_i32:
1260; GFX1010-GISEL: ; %bb.0:
1261; GFX1010-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001262; GFX1010-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1263; GFX1010-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301264; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0
1265; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1266; GFX1010-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0
1267; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1268; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s3
1269; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1270; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
1271; GFX1010-GISEL-NEXT: s_endpgm
1272;
1273; GFX1100-GISEL-LABEL: test_writelane_imm_i32:
1274; GFX1100-GISEL: ; %bb.0:
1275; GFX1100-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001276; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
1277; GFX1100-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301278; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0
1279; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001280; GFX1100-GISEL-NEXT: s_load_b32 s3, s[0:1], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301281; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001282; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s3
1283; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1284; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301285; GFX1100-GISEL-NEXT: s_endpgm
Matt Arsenaultad386a82022-11-28 14:13:14 -05001286 %oldval = load i32, ptr addrspace(1) %out
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301287 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 32, i32 %oldval) #0
Matt Arsenaultad386a82022-11-28 14:13:14 -05001288 store i32 %writelane, ptr addrspace(1) %out, align 4
Tim Renouf2a99fa22018-02-28 19:10:32 +00001289 ret void
1290}
1291
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301292define amdgpu_kernel void @test_writelane_imm_i64(ptr addrspace(1) %out, i64 %src0) #1 {
1293; GFX802-SDAG-LABEL: test_writelane_imm_i64:
1294; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001295; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Jun Wang31f39c82025-04-15 15:17:33 -07001296; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
1297; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
1298; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301299; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1300; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1301; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
1302; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
1303; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1304; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s5
1305; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s4
1306; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, 32
1307; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1308; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1309; GFX802-SDAG-NEXT: s_endpgm
1310;
1311; GFX1010-SDAG-LABEL: test_writelane_imm_i64:
1312; GFX1010-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001313; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301314; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
1315; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1316; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1317; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1318; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s5
1319; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4
1320; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, 32
1321; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1322; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1323; GFX1010-SDAG-NEXT: s_endpgm
1324;
1325; GFX1100-SDAG-LABEL: test_writelane_imm_i64:
1326; GFX1100-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001327; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301328; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
1329; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1330; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
1331; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1332; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s5
1333; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s4
1334; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, 32
1335; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1336; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301337; GFX1100-SDAG-NEXT: s_endpgm
1338;
1339; GFX802-GISEL-LABEL: test_writelane_imm_i64:
1340; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001341; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Jun Wang31f39c82025-04-15 15:17:33 -07001342; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
1343; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
1344; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301345; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1346; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1347; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
1348; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
1349; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1350; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s4
1351; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s5
1352; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1353; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, 32
1354; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1355; GFX802-GISEL-NEXT: s_endpgm
1356;
1357; GFX1010-GISEL-LABEL: test_writelane_imm_i64:
1358; GFX1010-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001359; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301360; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
1361; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1362; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1363; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1364; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4
1365; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s5
1366; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1367; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, 32
1368; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1369; GFX1010-GISEL-NEXT: s_endpgm
1370;
1371; GFX1100-GISEL-LABEL: test_writelane_imm_i64:
1372; GFX1100-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001373; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301374; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
1375; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1376; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
1377; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1378; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s4
1379; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s5
1380; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1381; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, 32
1382; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301383; GFX1100-GISEL-NEXT: s_endpgm
1384 %oldval = load i64, ptr addrspace(1) %out
1385 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 32, i64 %oldval) #0
1386 store i64 %writelane, ptr addrspace(1) %out, align 4
1387 ret void
1388}
1389
1390define amdgpu_kernel void @test_writelane_imm_f64(ptr addrspace(1) %out, double %src0) #1 {
1391; GFX802-SDAG-LABEL: test_writelane_imm_f64:
1392; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001393; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Jun Wang31f39c82025-04-15 15:17:33 -07001394; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
1395; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
1396; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301397; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1398; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1399; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
1400; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
1401; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1402; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s5
1403; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s4
1404; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, 32
1405; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1406; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1407; GFX802-SDAG-NEXT: s_endpgm
1408;
1409; GFX1010-SDAG-LABEL: test_writelane_imm_f64:
1410; GFX1010-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001411; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301412; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
1413; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1414; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1415; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1416; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s5
1417; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4
1418; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, 32
1419; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1420; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1421; GFX1010-SDAG-NEXT: s_endpgm
1422;
1423; GFX1100-SDAG-LABEL: test_writelane_imm_f64:
1424; GFX1100-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001425; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301426; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
1427; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1428; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
1429; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1430; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s5
1431; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s4
1432; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, 32
1433; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, 32
1434; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301435; GFX1100-SDAG-NEXT: s_endpgm
1436;
1437; GFX802-GISEL-LABEL: test_writelane_imm_f64:
1438; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001439; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Jun Wang31f39c82025-04-15 15:17:33 -07001440; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
1441; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
1442; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301443; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1444; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1445; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
1446; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
1447; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1448; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s4
1449; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s5
1450; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1451; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, 32
1452; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1453; GFX802-GISEL-NEXT: s_endpgm
1454;
1455; GFX1010-GISEL-LABEL: test_writelane_imm_f64:
1456; GFX1010-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001457; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301458; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
1459; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1460; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
1461; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1462; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4
1463; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s5
1464; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1465; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, 32
1466; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1467; GFX1010-GISEL-NEXT: s_endpgm
1468;
1469; GFX1100-GISEL-LABEL: test_writelane_imm_f64:
1470; GFX1100-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001471; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301472; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
1473; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1474; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
1475; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1476; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s4
1477; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s5
1478; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, 32
1479; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, 32
1480; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301481; GFX1100-GISEL-NEXT: s_endpgm
1482 %oldval = load double, ptr addrspace(1) %out
1483 %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 32, double %oldval) #0
1484 store double %writelane, ptr addrspace(1) %out, align 4
1485 ret void
1486}
1487
Austin Kerbow2c9a46c2025-05-11 21:18:11 -07001488define amdgpu_kernel void @test_writelane_sreg_oldval_i32(i32 %oldval, ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301489; GFX802-SDAG-LABEL: test_writelane_sreg_oldval_i32:
1490; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001491; GFX802-SDAG-NEXT: s_load_dword s4, s[8:9], 0x0
1492; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x8
Jun Wang31f39c82025-04-15 15:17:33 -07001493; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
1494; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
1495; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301496; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001497; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301498; GFX802-SDAG-NEXT: s_mov_b32 m0, s3
1499; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0
1500; GFX802-SDAG-NEXT: v_writelane_b32 v2, s2, m0
1501; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1
1502; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2
1503; GFX802-SDAG-NEXT: s_endpgm
1504;
1505; GFX1010-SDAG-LABEL: test_writelane_sreg_oldval_i32:
1506; GFX1010-SDAG: ; %bb.0:
1507; GFX1010-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001508; GFX1010-SDAG-NEXT: s_load_dword s4, s[8:9], 0x0
1509; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301510; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0
1511; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001512; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301513; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s3
1514; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1]
1515; GFX1010-SDAG-NEXT: s_endpgm
1516;
1517; GFX1100-SDAG-LABEL: test_writelane_sreg_oldval_i32:
1518; GFX1100-SDAG: ; %bb.0:
1519; GFX1100-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001520; GFX1100-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x0
1521; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301522; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
1523; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -05001524; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s6
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301525; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s3
1526; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301527; GFX1100-SDAG-NEXT: s_endpgm
1528;
1529; GFX802-GISEL-LABEL: test_writelane_sreg_oldval_i32:
1530; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001531; GFX802-GISEL-NEXT: s_load_dword s4, s[8:9], 0x0
1532; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x8
Jun Wang31f39c82025-04-15 15:17:33 -07001533; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
1534; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
1535; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301536; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001537; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301538; GFX802-GISEL-NEXT: s_mov_b32 m0, s3
1539; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
1540; GFX802-GISEL-NEXT: v_writelane_b32 v2, s2, m0
1541; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
1542; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2
1543; GFX802-GISEL-NEXT: s_endpgm
1544;
1545; GFX1010-GISEL-LABEL: test_writelane_sreg_oldval_i32:
1546; GFX1010-GISEL: ; %bb.0:
1547; GFX1010-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001548; GFX1010-GISEL-NEXT: s_load_dword s4, s[8:9], 0x0
1549; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301550; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0
1551; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001552; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301553; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s3
1554; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
1555; GFX1010-GISEL-NEXT: s_endpgm
1556;
1557; GFX1100-GISEL-LABEL: test_writelane_sreg_oldval_i32:
1558; GFX1100-GISEL: ; %bb.0:
1559; GFX1100-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001560; GFX1100-GISEL-NEXT: s_load_b32 s6, s[4:5], 0x0
1561; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301562; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0
1563; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -05001564; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s6
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301565; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s3
1566; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301567; GFX1100-GISEL-NEXT: s_endpgm
1568 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 %src1, i32 %oldval)
Matt Arsenaultad386a82022-11-28 14:13:14 -05001569 store i32 %writelane, ptr addrspace(1) %out, align 4
Tim Renouf2a99fa22018-02-28 19:10:32 +00001570 ret void
1571}
1572
Austin Kerbow2c9a46c2025-05-11 21:18:11 -07001573define amdgpu_kernel void @test_writelane_sreg_oldval_i64(i64 %oldval, ptr addrspace(1) %out, i64 %src0, i32 %src1) #1 {
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301574; GFX802-SDAG-LABEL: test_writelane_sreg_oldval_i64:
1575; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001576; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1577; GFX802-SDAG-NEXT: s_load_dword s6, s[8:9], 0x18
1578; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
Jun Wang31f39c82025-04-15 15:17:33 -07001579; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
1580; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301581; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1582; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
Shilei Tian6548b632024-11-08 20:21:16 -05001583; GFX802-SDAG-NEXT: s_mov_b32 m0, s6
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301584; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
Jun Wang31f39c82025-04-15 15:17:33 -07001585; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301586; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2
1587; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
1588; GFX802-SDAG-NEXT: v_writelane_b32 v3, s5, m0
1589; GFX802-SDAG-NEXT: v_writelane_b32 v2, s4, m0
1590; GFX802-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
1591; GFX802-SDAG-NEXT: s_endpgm
1592;
1593; GFX1010-SDAG-LABEL: test_writelane_sreg_oldval_i64:
1594; GFX1010-SDAG: ; %bb.0:
1595; GFX1010-SDAG-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -05001596; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1597; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
1598; GFX1010-SDAG-NEXT: s_load_dword s6, s[8:9], 0x18
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301599; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
1600; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1601; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s1
1602; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s0
Shilei Tian6548b632024-11-08 20:21:16 -05001603; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s5, s6
1604; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s4, s6
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301605; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
1606; GFX1010-SDAG-NEXT: s_endpgm
1607;
1608; GFX1100-SDAG-LABEL: test_writelane_sreg_oldval_i64:
1609; GFX1100-SDAG: ; %bb.0:
1610; GFX1100-SDAG-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -05001611; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1612; GFX1100-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
1613; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x18
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301614; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
1615; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -05001616; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s1
1617; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s0
1618; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s7, s4
1619; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s6, s4
1620; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[2:3]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301621; GFX1100-SDAG-NEXT: s_endpgm
1622;
1623; GFX802-GISEL-LABEL: test_writelane_sreg_oldval_i64:
1624; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001625; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1626; GFX802-GISEL-NEXT: s_load_dword s6, s[8:9], 0x18
1627; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
Jun Wang31f39c82025-04-15 15:17:33 -07001628; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
1629; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301630; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1631; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
1632; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
Shilei Tian6548b632024-11-08 20:21:16 -05001633; GFX802-GISEL-NEXT: s_mov_b32 m0, s6
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301634; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s2
Jun Wang31f39c82025-04-15 15:17:33 -07001635; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301636; GFX802-GISEL-NEXT: v_writelane_b32 v0, s4, m0
1637; GFX802-GISEL-NEXT: v_writelane_b32 v1, s5, m0
1638; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s3
1639; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1640; GFX802-GISEL-NEXT: s_endpgm
1641;
1642; GFX1010-GISEL-LABEL: test_writelane_sreg_oldval_i64:
1643; GFX1010-GISEL: ; %bb.0:
1644; GFX1010-GISEL-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -05001645; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1646; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
1647; GFX1010-GISEL-NEXT: s_load_dword s6, s[8:9], 0x18
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301648; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
1649; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1650; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s0
1651; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s1
Shilei Tian6548b632024-11-08 20:21:16 -05001652; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s4, s6
1653; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s5, s6
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301654; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
1655; GFX1010-GISEL-NEXT: s_endpgm
1656;
1657; GFX1100-GISEL-LABEL: test_writelane_sreg_oldval_i64:
1658; GFX1100-GISEL: ; %bb.0:
1659; GFX1100-GISEL-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -05001660; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1661; GFX1100-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
1662; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x18
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301663; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
1664; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -05001665; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s0
1666; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s1
1667; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s6, s4
1668; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s7, s4
1669; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[2:3]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301670; GFX1100-GISEL-NEXT: s_endpgm
1671 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 %src1, i64 %oldval)
1672 store i64 %writelane, ptr addrspace(1) %out, align 4
1673 ret void
1674}
1675
Austin Kerbow2c9a46c2025-05-11 21:18:11 -07001676define amdgpu_kernel void @test_writelane_sreg_oldval_f64(double %oldval, ptr addrspace(1) %out, double %src0, i32 %src1) #1 {
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301677; GFX802-SDAG-LABEL: test_writelane_sreg_oldval_f64:
1678; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001679; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1680; GFX802-SDAG-NEXT: s_load_dword s6, s[8:9], 0x18
1681; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
Jun Wang31f39c82025-04-15 15:17:33 -07001682; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
1683; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301684; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1685; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
Shilei Tian6548b632024-11-08 20:21:16 -05001686; GFX802-SDAG-NEXT: s_mov_b32 m0, s6
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301687; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
Jun Wang31f39c82025-04-15 15:17:33 -07001688; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301689; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2
1690; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3
1691; GFX802-SDAG-NEXT: v_writelane_b32 v3, s5, m0
1692; GFX802-SDAG-NEXT: v_writelane_b32 v2, s4, m0
1693; GFX802-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
1694; GFX802-SDAG-NEXT: s_endpgm
1695;
1696; GFX1010-SDAG-LABEL: test_writelane_sreg_oldval_f64:
1697; GFX1010-SDAG: ; %bb.0:
1698; GFX1010-SDAG-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -05001699; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1700; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
1701; GFX1010-SDAG-NEXT: s_load_dword s6, s[8:9], 0x18
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301702; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
1703; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1704; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s1
1705; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s0
Shilei Tian6548b632024-11-08 20:21:16 -05001706; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s5, s6
1707; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s4, s6
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301708; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
1709; GFX1010-SDAG-NEXT: s_endpgm
1710;
1711; GFX1100-SDAG-LABEL: test_writelane_sreg_oldval_f64:
1712; GFX1100-SDAG: ; %bb.0:
1713; GFX1100-SDAG-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -05001714; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1715; GFX1100-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
1716; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x18
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301717; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
1718; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -05001719; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s1
1720; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s0
1721; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s7, s4
1722; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s6, s4
1723; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[2:3]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301724; GFX1100-SDAG-NEXT: s_endpgm
1725;
1726; GFX802-GISEL-LABEL: test_writelane_sreg_oldval_f64:
1727; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001728; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1729; GFX802-GISEL-NEXT: s_load_dword s6, s[8:9], 0x18
1730; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
Jun Wang31f39c82025-04-15 15:17:33 -07001731; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
1732; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301733; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1734; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
1735; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
Shilei Tian6548b632024-11-08 20:21:16 -05001736; GFX802-GISEL-NEXT: s_mov_b32 m0, s6
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301737; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s2
Jun Wang31f39c82025-04-15 15:17:33 -07001738; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301739; GFX802-GISEL-NEXT: v_writelane_b32 v0, s4, m0
1740; GFX802-GISEL-NEXT: v_writelane_b32 v1, s5, m0
1741; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s3
1742; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1743; GFX802-GISEL-NEXT: s_endpgm
1744;
1745; GFX1010-GISEL-LABEL: test_writelane_sreg_oldval_f64:
1746; GFX1010-GISEL: ; %bb.0:
1747; GFX1010-GISEL-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -05001748; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1749; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
1750; GFX1010-GISEL-NEXT: s_load_dword s6, s[8:9], 0x18
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301751; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
1752; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1753; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s0
1754; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s1
Shilei Tian6548b632024-11-08 20:21:16 -05001755; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s4, s6
1756; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s5, s6
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301757; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
1758; GFX1010-GISEL-NEXT: s_endpgm
1759;
1760; GFX1100-GISEL-LABEL: test_writelane_sreg_oldval_f64:
1761; GFX1100-GISEL: ; %bb.0:
1762; GFX1100-GISEL-NEXT: s_clause 0x2
Shilei Tian6548b632024-11-08 20:21:16 -05001763; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1764; GFX1100-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
1765; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x18
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301766; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
1767; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -05001768; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s0
1769; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s1
1770; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s6, s4
1771; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s7, s4
1772; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[2:3]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301773; GFX1100-GISEL-NEXT: s_endpgm
1774 %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 %src1, double %oldval)
1775 store double %writelane, ptr addrspace(1) %out, align 4
1776 ret void
1777}
1778
1779define amdgpu_kernel void @test_writelane_imm_oldval_i32(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
1780; GFX802-SDAG-LABEL: test_writelane_imm_oldval_i32:
1781; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001782; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Jun Wang31f39c82025-04-15 15:17:33 -07001783; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301784; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, 42
Jun Wang31f39c82025-04-15 15:17:33 -07001785; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
1786; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301787; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1788; GFX802-SDAG-NEXT: s_mov_b32 m0, s3
1789; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0
1790; GFX802-SDAG-NEXT: v_writelane_b32 v2, s2, m0
1791; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1
1792; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2
1793; GFX802-SDAG-NEXT: s_endpgm
1794;
1795; GFX1010-SDAG-LABEL: test_writelane_imm_oldval_i32:
1796; GFX1010-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001797; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301798; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, 42
1799; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0
1800; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1801; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s3
1802; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1]
1803; GFX1010-SDAG-NEXT: s_endpgm
1804;
1805; GFX1100-SDAG-LABEL: test_writelane_imm_oldval_i32:
1806; GFX1100-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001807; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301808; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 42
1809; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
1810; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1811; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s3
1812; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301813; GFX1100-SDAG-NEXT: s_endpgm
1814;
1815; GFX802-GISEL-LABEL: test_writelane_imm_oldval_i32:
1816; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001817; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Jun Wang31f39c82025-04-15 15:17:33 -07001818; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301819; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, 42
Jun Wang31f39c82025-04-15 15:17:33 -07001820; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
1821; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301822; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1823; GFX802-GISEL-NEXT: s_mov_b32 m0, s3
1824; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0
1825; GFX802-GISEL-NEXT: v_writelane_b32 v2, s2, m0
1826; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1
1827; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2
1828; GFX802-GISEL-NEXT: s_endpgm
1829;
1830; GFX1010-GISEL-LABEL: test_writelane_imm_oldval_i32:
1831; GFX1010-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001832; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301833; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 42
1834; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0
1835; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1836; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s3
1837; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
1838; GFX1010-GISEL-NEXT: s_endpgm
1839;
1840; GFX1100-GISEL-LABEL: test_writelane_imm_oldval_i32:
1841; GFX1100-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001842; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301843; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 42
1844; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0
1845; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1846; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s3
1847; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301848; GFX1100-GISEL-NEXT: s_endpgm
1849 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 %src1, i32 42)
Matt Arsenaultad386a82022-11-28 14:13:14 -05001850 store i32 %writelane, ptr addrspace(1) %out, align 4
Tim Renouf2a99fa22018-02-28 19:10:32 +00001851 ret void
1852}
1853
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301854define amdgpu_kernel void @test_writelane_imm_oldval_i64(ptr addrspace(1) %out, i64 %src0, i32 %src1) #1 {
1855; GFX802-SDAG-LABEL: test_writelane_imm_oldval_i64:
1856; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001857; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1858; GFX802-SDAG-NEXT: s_load_dword s4, s[8:9], 0x10
Jun Wang31f39c82025-04-15 15:17:33 -07001859; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301860; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, 0
1861; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, 42
Jun Wang31f39c82025-04-15 15:17:33 -07001862; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301863; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301864; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
Jun Wang31f39c82025-04-15 15:17:33 -07001865; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
1866; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301867; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
1868; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, m0
1869; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, m0
1870; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1871; GFX802-SDAG-NEXT: s_endpgm
1872;
1873; GFX1010-SDAG-LABEL: test_writelane_imm_oldval_i64:
1874; GFX1010-SDAG: ; %bb.0:
1875; GFX1010-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001876; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1877; GFX1010-SDAG-NEXT: s_load_dword s4, s[8:9], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301878; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0
1879; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, 42
1880; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
1881; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001882; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, s4
1883; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301884; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1885; GFX1010-SDAG-NEXT: s_endpgm
1886;
1887; GFX1100-SDAG-LABEL: test_writelane_imm_oldval_i64:
1888; GFX1100-SDAG: ; %bb.0:
1889; GFX1100-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001890; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1891; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301892; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0
1893; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 42
1894; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
1895; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -05001896; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, s4
1897; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s4
1898; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301899; GFX1100-SDAG-NEXT: s_endpgm
1900;
1901; GFX802-GISEL-LABEL: test_writelane_imm_oldval_i64:
1902; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001903; GFX802-GISEL-NEXT: s_load_dword s4, s[8:9], 0x10
1904; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Jun Wang31f39c82025-04-15 15:17:33 -07001905; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301906; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, 42
1907; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, 0
1908; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001909; GFX802-GISEL-NEXT: s_mov_b32 m0, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301910; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
Jun Wang31f39c82025-04-15 15:17:33 -07001911; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
1912; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301913; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, m0
1914; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, m0
1915; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
1916; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1917; GFX802-GISEL-NEXT: s_endpgm
1918;
1919; GFX1010-GISEL-LABEL: test_writelane_imm_oldval_i64:
1920; GFX1010-GISEL: ; %bb.0:
1921; GFX1010-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001922; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1923; GFX1010-GISEL-NEXT: s_load_dword s4, s[8:9], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301924; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 42
1925; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0
1926; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
1927; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001928; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s4
1929; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301930; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1931; GFX1010-GISEL-NEXT: s_endpgm
1932;
1933; GFX1100-GISEL-LABEL: test_writelane_imm_oldval_i64:
1934; GFX1100-GISEL: ; %bb.0:
1935; GFX1100-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001936; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1937; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301938; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 42
1939; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0
1940; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
1941; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -05001942; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s4
1943; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, s4
1944; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301945; GFX1100-GISEL-NEXT: s_endpgm
1946 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 %src1, i64 42)
1947 store i64 %writelane, ptr addrspace(1) %out, align 4
1948 ret void
1949}
1950
1951define amdgpu_kernel void @test_writelane_imm_oldval_f64(ptr addrspace(1) %out, double %src0, i32 %src1) #1 {
1952; GFX802-SDAG-LABEL: test_writelane_imm_oldval_f64:
1953; GFX802-SDAG: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05001954; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1955; GFX802-SDAG-NEXT: s_load_dword s4, s[8:9], 0x10
Jun Wang31f39c82025-04-15 15:17:33 -07001956; GFX802-SDAG-NEXT: s_add_i32 s12, s12, s17
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301957; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, 0x40450000
1958; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, 0
Jun Wang31f39c82025-04-15 15:17:33 -07001959; GFX802-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301960; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301961; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
Jun Wang31f39c82025-04-15 15:17:33 -07001962; GFX802-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
1963; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301964; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1
1965; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, m0
1966; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, m0
1967; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1968; GFX802-SDAG-NEXT: s_endpgm
1969;
1970; GFX1010-SDAG-LABEL: test_writelane_imm_oldval_f64:
1971; GFX1010-SDAG: ; %bb.0:
1972; GFX1010-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001973; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
1974; GFX1010-SDAG-NEXT: s_load_dword s4, s[8:9], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301975; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0x40450000
1976; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, 0
1977; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0
1978; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04001979; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, s4
1980; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301981; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1982; GFX1010-SDAG-NEXT: s_endpgm
1983;
1984; GFX1100-SDAG-LABEL: test_writelane_imm_oldval_f64:
1985; GFX1100-SDAG: ; %bb.0:
1986; GFX1100-SDAG-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05001987; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
1988; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301989; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0x40450000
1990; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0
1991; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0
1992; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -05001993; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, s4
1994; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s4
1995; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05301996; GFX1100-SDAG-NEXT: s_endpgm
1997;
1998; GFX802-GISEL-LABEL: test_writelane_imm_oldval_f64:
1999; GFX802-GISEL: ; %bb.0:
Shilei Tian6548b632024-11-08 20:21:16 -05002000; GFX802-GISEL-NEXT: s_load_dword s4, s[8:9], 0x10
2001; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
Jun Wang31f39c82025-04-15 15:17:33 -07002002; GFX802-GISEL-NEXT: s_add_i32 s12, s12, s17
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302003; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, 0
2004; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, 0x40450000
2005; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04002006; GFX802-GISEL-NEXT: s_mov_b32 m0, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302007; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1
Jun Wang31f39c82025-04-15 15:17:33 -07002008; GFX802-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
2009; GFX802-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302010; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, m0
2011; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, m0
2012; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0
2013; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
2014; GFX802-GISEL-NEXT: s_endpgm
2015;
2016; GFX1010-GISEL-LABEL: test_writelane_imm_oldval_f64:
2017; GFX1010-GISEL: ; %bb.0:
2018; GFX1010-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05002019; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
2020; GFX1010-GISEL-NEXT: s_load_dword s4, s[8:9], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302021; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 0
2022; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0x40450000
2023; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0
2024; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Matt Arsenaultb1bcb7c2024-07-15 09:59:07 +04002025; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s4
2026; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302027; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
2028; GFX1010-GISEL-NEXT: s_endpgm
2029;
2030; GFX1100-GISEL-LABEL: test_writelane_imm_oldval_f64:
2031; GFX1100-GISEL: ; %bb.0:
2032; GFX1100-GISEL-NEXT: s_clause 0x1
Shilei Tian6548b632024-11-08 20:21:16 -05002033; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
2034; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x10
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302035; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0
2036; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0x40450000
2037; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
2038; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
Shilei Tian6548b632024-11-08 20:21:16 -05002039; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s4
2040; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, s4
2041; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302042; GFX1100-GISEL-NEXT: s_endpgm
2043 %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 %src1, double 42.0)
2044 store double %writelane, ptr addrspace(1) %out, align 4
2045 ret void
2046}
2047
2048define void @test_writelane_half(ptr addrspace(1) %out, half %src, i32 %src1) {
2049; GFX802-SDAG-LABEL: test_writelane_half:
2050; GFX802-SDAG: ; %bb.0:
2051; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2052; GFX802-SDAG-NEXT: flat_load_ushort v4, v[0:1]
Pierre van Houtryve52317362025-02-26 13:14:03 +01002053; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v3
2054; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v2
2055; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302056; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
Pierre van Houtryve52317362025-02-26 13:14:03 +01002057; GFX802-SDAG-NEXT: v_writelane_b32 v4, s5, m0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302058; GFX802-SDAG-NEXT: flat_store_short v[0:1], v4
2059; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2060; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2061;
2062; GFX1010-SDAG-LABEL: test_writelane_half:
2063; GFX1010-SDAG: ; %bb.0:
2064; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2065; GFX1010-SDAG-NEXT: global_load_ushort v4, v[0:1], off
2066; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2067; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3
2068; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2069; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5
2070; GFX1010-SDAG-NEXT: global_store_short v[0:1], v4, off
2071; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2072;
2073; GFX1100-SDAG-LABEL: test_writelane_half:
2074; GFX1100-SDAG: ; %bb.0:
2075; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2076; GFX1100-SDAG-NEXT: global_load_u16 v4, v[0:1], off
2077; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2
2078; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3
2079; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302080; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1
2081; GFX1100-SDAG-NEXT: global_store_b16 v[0:1], v4, off
2082; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2083;
2084; GFX802-GISEL-LABEL: test_writelane_half:
2085; GFX802-GISEL: ; %bb.0:
2086; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2087; GFX802-GISEL-NEXT: flat_load_ushort v4, v[0:1]
2088; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2089; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2090; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
2091; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2092; GFX802-GISEL-NEXT: v_writelane_b32 v4, s4, m0
2093; GFX802-GISEL-NEXT: flat_store_short v[0:1], v4
2094; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2095; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2096;
2097; GFX1010-GISEL-LABEL: test_writelane_half:
2098; GFX1010-GISEL: ; %bb.0:
2099; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2100; GFX1010-GISEL-NEXT: global_load_ushort v4, v[0:1], off
2101; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2102; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2103; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2104; GFX1010-GISEL-NEXT: v_writelane_b32 v4, s4, s5
2105; GFX1010-GISEL-NEXT: global_store_short v[0:1], v4, off
2106; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2107;
2108; GFX1100-GISEL-LABEL: test_writelane_half:
2109; GFX1100-GISEL: ; %bb.0:
2110; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2111; GFX1100-GISEL-NEXT: global_load_u16 v4, v[0:1], off
2112; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2113; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3
2114; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302115; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1
2116; GFX1100-GISEL-NEXT: global_store_b16 v[0:1], v4, off
2117; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2118 %oldval = load half, ptr addrspace(1) %out
2119 %writelane = call half @llvm.amdgcn.writelane.f16(half %src, i32 %src1, half %oldval)
2120 store half %writelane, ptr addrspace(1) %out, align 4
2121 ret void
2122}
2123
2124define void @test_writelane_float(ptr addrspace(1) %out, float %src, i32 %src1) {
2125; GFX802-SDAG-LABEL: test_writelane_float:
2126; GFX802-SDAG: ; %bb.0:
2127; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2128; GFX802-SDAG-NEXT: flat_load_dword v4, v[0:1]
Pierre van Houtryve52317362025-02-26 13:14:03 +01002129; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v3
2130; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v2
2131; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302132; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
Pierre van Houtryve52317362025-02-26 13:14:03 +01002133; GFX802-SDAG-NEXT: v_writelane_b32 v4, s5, m0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302134; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v4
2135; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2136; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2137;
2138; GFX1010-SDAG-LABEL: test_writelane_float:
2139; GFX1010-SDAG: ; %bb.0:
2140; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2141; GFX1010-SDAG-NEXT: global_load_dword v4, v[0:1], off
2142; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2143; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3
2144; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2145; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5
2146; GFX1010-SDAG-NEXT: global_store_dword v[0:1], v4, off
2147; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2148;
2149; GFX1100-SDAG-LABEL: test_writelane_float:
2150; GFX1100-SDAG: ; %bb.0:
2151; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2152; GFX1100-SDAG-NEXT: global_load_b32 v4, v[0:1], off
2153; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2
2154; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3
2155; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302156; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1
2157; GFX1100-SDAG-NEXT: global_store_b32 v[0:1], v4, off
2158; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2159;
2160; GFX802-GISEL-LABEL: test_writelane_float:
2161; GFX802-GISEL: ; %bb.0:
2162; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2163; GFX802-GISEL-NEXT: flat_load_dword v4, v[0:1]
2164; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2165; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2166; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
2167; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2168; GFX802-GISEL-NEXT: v_writelane_b32 v4, s4, m0
2169; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v4
2170; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2171; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2172;
2173; GFX1010-GISEL-LABEL: test_writelane_float:
2174; GFX1010-GISEL: ; %bb.0:
2175; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2176; GFX1010-GISEL-NEXT: global_load_dword v4, v[0:1], off
2177; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2178; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2179; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2180; GFX1010-GISEL-NEXT: v_writelane_b32 v4, s4, s5
2181; GFX1010-GISEL-NEXT: global_store_dword v[0:1], v4, off
2182; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2183;
2184; GFX1100-GISEL-LABEL: test_writelane_float:
2185; GFX1100-GISEL: ; %bb.0:
2186; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2187; GFX1100-GISEL-NEXT: global_load_b32 v4, v[0:1], off
2188; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2189; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3
2190; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302191; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1
2192; GFX1100-GISEL-NEXT: global_store_b32 v[0:1], v4, off
2193; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2194 %oldval = load float, ptr addrspace(1) %out
2195 %writelane = call float @llvm.amdgcn.writelane.f32(float %src, i32 %src1, float %oldval)
2196 store float %writelane, ptr addrspace(1) %out, align 4
2197 ret void
2198}
2199
2200define void @test_writelane_bfloat(ptr addrspace(1) %out, bfloat %src, i32 %src1) {
2201; GFX802-SDAG-LABEL: test_writelane_bfloat:
2202; GFX802-SDAG: ; %bb.0:
2203; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2204; GFX802-SDAG-NEXT: flat_load_ushort v4, v[0:1]
Pierre van Houtryve52317362025-02-26 13:14:03 +01002205; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v3
2206; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v2
2207; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302208; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
Pierre van Houtryve52317362025-02-26 13:14:03 +01002209; GFX802-SDAG-NEXT: v_writelane_b32 v4, s5, m0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302210; GFX802-SDAG-NEXT: flat_store_short v[0:1], v4
2211; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2212; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2213;
2214; GFX1010-SDAG-LABEL: test_writelane_bfloat:
2215; GFX1010-SDAG: ; %bb.0:
2216; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2217; GFX1010-SDAG-NEXT: global_load_ushort v4, v[0:1], off
2218; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2219; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3
2220; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2221; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5
2222; GFX1010-SDAG-NEXT: global_store_short v[0:1], v4, off
2223; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2224;
2225; GFX1100-SDAG-LABEL: test_writelane_bfloat:
2226; GFX1100-SDAG: ; %bb.0:
2227; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2228; GFX1100-SDAG-NEXT: global_load_u16 v4, v[0:1], off
2229; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2
2230; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3
2231; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302232; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1
2233; GFX1100-SDAG-NEXT: global_store_b16 v[0:1], v4, off
2234; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2235;
2236; GFX802-GISEL-LABEL: test_writelane_bfloat:
2237; GFX802-GISEL: ; %bb.0:
2238; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2239; GFX802-GISEL-NEXT: flat_load_ushort v4, v[0:1]
Pierre van Houtryve52317362025-02-26 13:14:03 +01002240; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v3
2241; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v2
2242; GFX802-GISEL-NEXT: s_mov_b32 m0, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302243; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
Pierre van Houtryve52317362025-02-26 13:14:03 +01002244; GFX802-GISEL-NEXT: v_writelane_b32 v4, s5, m0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302245; GFX802-GISEL-NEXT: flat_store_short v[0:1], v4
2246; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2247; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2248;
2249; GFX1010-GISEL-LABEL: test_writelane_bfloat:
2250; GFX1010-GISEL: ; %bb.0:
2251; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2252; GFX1010-GISEL-NEXT: global_load_ushort v4, v[0:1], off
2253; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2254; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2255; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2256; GFX1010-GISEL-NEXT: v_writelane_b32 v4, s4, s5
2257; GFX1010-GISEL-NEXT: global_store_short v[0:1], v4, off
2258; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2259;
2260; GFX1100-GISEL-LABEL: test_writelane_bfloat:
2261; GFX1100-GISEL: ; %bb.0:
2262; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2263; GFX1100-GISEL-NEXT: global_load_u16 v4, v[0:1], off
2264; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2265; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3
2266; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302267; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1
2268; GFX1100-GISEL-NEXT: global_store_b16 v[0:1], v4, off
2269; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2270 %oldval = load bfloat, ptr addrspace(1) %out
2271 %writelane = call bfloat @llvm.amdgcn.writelane.bf16(bfloat %src, i32 %src1, bfloat %oldval)
2272 store bfloat %writelane, ptr addrspace(1) %out, align 4
2273 ret void
2274}
2275
2276define void @test_writelane_i16(ptr addrspace(1) %out, i16 %src, i32 %src1) {
2277; GFX802-SDAG-LABEL: test_writelane_i16:
2278; GFX802-SDAG: ; %bb.0:
2279; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2280; GFX802-SDAG-NEXT: flat_load_ushort v4, v[0:1]
Pierre van Houtryve52317362025-02-26 13:14:03 +01002281; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v3
2282; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v2
2283; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302284; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
Pierre van Houtryve52317362025-02-26 13:14:03 +01002285; GFX802-SDAG-NEXT: v_writelane_b32 v4, s5, m0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302286; GFX802-SDAG-NEXT: flat_store_short v[0:1], v4
2287; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2288; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2289;
2290; GFX1010-SDAG-LABEL: test_writelane_i16:
2291; GFX1010-SDAG: ; %bb.0:
2292; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2293; GFX1010-SDAG-NEXT: global_load_ushort v4, v[0:1], off
2294; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2295; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3
2296; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2297; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5
2298; GFX1010-SDAG-NEXT: global_store_short v[0:1], v4, off
2299; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2300;
2301; GFX1100-SDAG-LABEL: test_writelane_i16:
2302; GFX1100-SDAG: ; %bb.0:
2303; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2304; GFX1100-SDAG-NEXT: global_load_u16 v4, v[0:1], off
2305; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2
2306; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3
2307; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302308; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1
2309; GFX1100-SDAG-NEXT: global_store_b16 v[0:1], v4, off
2310; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2311;
2312; GFX802-GISEL-LABEL: test_writelane_i16:
2313; GFX802-GISEL: ; %bb.0:
2314; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2315; GFX802-GISEL-NEXT: flat_load_ushort v4, v[0:1]
2316; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2317; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2318; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
2319; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2320; GFX802-GISEL-NEXT: v_writelane_b32 v4, s4, m0
2321; GFX802-GISEL-NEXT: flat_store_short v[0:1], v4
2322; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2323; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2324;
2325; GFX1010-GISEL-LABEL: test_writelane_i16:
2326; GFX1010-GISEL: ; %bb.0:
2327; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2328; GFX1010-GISEL-NEXT: global_load_ushort v4, v[0:1], off
2329; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2330; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2331; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2332; GFX1010-GISEL-NEXT: v_writelane_b32 v4, s4, s5
2333; GFX1010-GISEL-NEXT: global_store_short v[0:1], v4, off
2334; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2335;
2336; GFX1100-GISEL-LABEL: test_writelane_i16:
2337; GFX1100-GISEL: ; %bb.0:
2338; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2339; GFX1100-GISEL-NEXT: global_load_u16 v4, v[0:1], off
2340; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2341; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3
2342; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302343; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1
2344; GFX1100-GISEL-NEXT: global_store_b16 v[0:1], v4, off
2345; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2346 %oldval = load i16, ptr addrspace(1) %out
2347 %writelane = call i16 @llvm.amdgcn.writelane.i16(i16 %src, i32 %src1, i16 %oldval)
2348 store i16 %writelane, ptr addrspace(1) %out, align 4
2349 ret void
2350}
2351
2352define void @test_writelane_v2f16(ptr addrspace(1) %out, <2 x half> %src, i32 %src1) {
2353; GFX802-SDAG-LABEL: test_writelane_v2f16:
2354; GFX802-SDAG: ; %bb.0:
2355; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2356; GFX802-SDAG-NEXT: flat_load_dword v4, v[0:1]
Pierre van Houtryve52317362025-02-26 13:14:03 +01002357; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v3
2358; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v2
2359; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302360; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
Pierre van Houtryve52317362025-02-26 13:14:03 +01002361; GFX802-SDAG-NEXT: v_writelane_b32 v4, s5, m0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302362; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v4
2363; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2364; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2365;
2366; GFX1010-SDAG-LABEL: test_writelane_v2f16:
2367; GFX1010-SDAG: ; %bb.0:
2368; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2369; GFX1010-SDAG-NEXT: global_load_dword v4, v[0:1], off
2370; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2371; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3
2372; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2373; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5
2374; GFX1010-SDAG-NEXT: global_store_dword v[0:1], v4, off
2375; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2376;
2377; GFX1100-SDAG-LABEL: test_writelane_v2f16:
2378; GFX1100-SDAG: ; %bb.0:
2379; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2380; GFX1100-SDAG-NEXT: global_load_b32 v4, v[0:1], off
2381; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2
2382; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3
2383; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302384; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1
2385; GFX1100-SDAG-NEXT: global_store_b32 v[0:1], v4, off
2386; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2387;
2388; GFX802-GISEL-LABEL: test_writelane_v2f16:
2389; GFX802-GISEL: ; %bb.0:
2390; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2391; GFX802-GISEL-NEXT: flat_load_dword v4, v[0:1]
2392; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2393; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2394; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
2395; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2396; GFX802-GISEL-NEXT: v_writelane_b32 v4, s4, m0
2397; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v4
2398; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2399; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2400;
2401; GFX1010-GISEL-LABEL: test_writelane_v2f16:
2402; GFX1010-GISEL: ; %bb.0:
2403; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2404; GFX1010-GISEL-NEXT: global_load_dword v4, v[0:1], off
2405; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2406; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v3
2407; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2408; GFX1010-GISEL-NEXT: v_writelane_b32 v4, s4, s5
2409; GFX1010-GISEL-NEXT: global_store_dword v[0:1], v4, off
2410; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2411;
2412; GFX1100-GISEL-LABEL: test_writelane_v2f16:
2413; GFX1100-GISEL: ; %bb.0:
2414; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2415; GFX1100-GISEL-NEXT: global_load_b32 v4, v[0:1], off
2416; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2417; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3
2418; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302419; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1
2420; GFX1100-GISEL-NEXT: global_store_b32 v[0:1], v4, off
2421; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2422 %oldval = load <2 x half>, ptr addrspace(1) %out
2423 %writelane = call <2 x half> @llvm.amdgcn.writelane.v2f16(<2 x half> %src, i32 %src1, <2 x half> %oldval)
2424 store <2 x half> %writelane, ptr addrspace(1) %out, align 4
2425 ret void
2426}
2427
2428define void @test_readlane_v2f32(ptr addrspace(1) %out, <2 x float> %src, i32 %src1) {
2429; GFX802-SDAG-LABEL: test_readlane_v2f32:
2430; GFX802-SDAG: ; %bb.0:
2431; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2432; GFX802-SDAG-NEXT: flat_load_dwordx2 v[5:6], v[0:1]
Pierre van Houtryve52317362025-02-26 13:14:03 +01002433; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v4
2434; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v3
2435; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v2
2436; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302437; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
Pierre van Houtryve52317362025-02-26 13:14:03 +01002438; GFX802-SDAG-NEXT: v_writelane_b32 v6, s5, m0
2439; GFX802-SDAG-NEXT: v_writelane_b32 v5, s6, m0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302440; GFX802-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[5:6]
2441; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2442; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2443;
2444; GFX1010-SDAG-LABEL: test_readlane_v2f32:
2445; GFX1010-SDAG: ; %bb.0:
2446; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2447; GFX1010-SDAG-NEXT: global_load_dwordx2 v[5:6], v[0:1], off
2448; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v3
2449; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v4
2450; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v2
2451; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2452; GFX1010-SDAG-NEXT: v_writelane_b32 v6, s4, s5
2453; GFX1010-SDAG-NEXT: v_writelane_b32 v5, s6, s5
2454; GFX1010-SDAG-NEXT: global_store_dwordx2 v[0:1], v[5:6], off
2455; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2456;
2457; GFX1100-SDAG-LABEL: test_readlane_v2f32:
2458; GFX1100-SDAG: ; %bb.0:
2459; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2460; GFX1100-SDAG-NEXT: global_load_b64 v[5:6], v[0:1], off
2461; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v3
2462; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v4
2463; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v2
2464; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302465; GFX1100-SDAG-NEXT: v_writelane_b32 v6, s0, s1
2466; GFX1100-SDAG-NEXT: v_writelane_b32 v5, s2, s1
2467; GFX1100-SDAG-NEXT: global_store_b64 v[0:1], v[5:6], off
2468; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2469;
2470; GFX802-GISEL-LABEL: test_readlane_v2f32:
2471; GFX802-GISEL: ; %bb.0:
2472; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2473; GFX802-GISEL-NEXT: flat_load_dwordx2 v[5:6], v[0:1]
2474; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v4
2475; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2476; GFX802-GISEL-NEXT: v_readfirstlane_b32 s6, v3
2477; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
2478; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2479; GFX802-GISEL-NEXT: v_writelane_b32 v5, s4, m0
2480; GFX802-GISEL-NEXT: v_writelane_b32 v6, s6, m0
2481; GFX802-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[5:6]
2482; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2483; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2484;
2485; GFX1010-GISEL-LABEL: test_readlane_v2f32:
2486; GFX1010-GISEL: ; %bb.0:
2487; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2488; GFX1010-GISEL-NEXT: global_load_dwordx2 v[5:6], v[0:1], off
2489; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2490; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v4
2491; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s6, v3
2492; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2493; GFX1010-GISEL-NEXT: v_writelane_b32 v5, s4, s5
2494; GFX1010-GISEL-NEXT: v_writelane_b32 v6, s6, s5
2495; GFX1010-GISEL-NEXT: global_store_dwordx2 v[0:1], v[5:6], off
2496; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2497;
2498; GFX1100-GISEL-LABEL: test_readlane_v2f32:
2499; GFX1100-GISEL: ; %bb.0:
2500; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2501; GFX1100-GISEL-NEXT: global_load_b64 v[5:6], v[0:1], off
2502; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2503; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v4
2504; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v3
2505; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302506; GFX1100-GISEL-NEXT: v_writelane_b32 v5, s0, s1
2507; GFX1100-GISEL-NEXT: v_writelane_b32 v6, s2, s1
2508; GFX1100-GISEL-NEXT: global_store_b64 v[0:1], v[5:6], off
2509; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2510 %oldval = load <2 x float>, ptr addrspace(1) %out
2511 %writelane = call <2 x float> @llvm.amdgcn.writelane.v2f32(<2 x float> %src, i32 %src1, <2 x float> %oldval)
2512 store <2 x float> %writelane, ptr addrspace(1) %out, align 4
2513 ret void
2514}
2515
2516define void @test_writelane_v7i32(ptr addrspace(1) %out, <7 x i32> %src, i32 %src1) {
2517; GFX802-SDAG-LABEL: test_writelane_v7i32:
2518; GFX802-SDAG: ; %bb.0:
2519; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2520; GFX802-SDAG-NEXT: v_add_u32_e32 v17, vcc, 16, v0
2521; GFX802-SDAG-NEXT: flat_load_dwordx4 v[10:13], v[0:1]
2522; GFX802-SDAG-NEXT: v_addc_u32_e32 v18, vcc, 0, v1, vcc
2523; GFX802-SDAG-NEXT: flat_load_dwordx3 v[14:16], v[17:18]
Pierre van Houtryve52317362025-02-26 13:14:03 +01002524; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v9
2525; GFX802-SDAG-NEXT: v_readfirstlane_b32 s8, v5
2526; GFX802-SDAG-NEXT: v_readfirstlane_b32 s9, v4
2527; GFX802-SDAG-NEXT: v_readfirstlane_b32 s10, v3
2528; GFX802-SDAG-NEXT: v_readfirstlane_b32 s11, v2
2529; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
2530; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v8
2531; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v7
2532; GFX802-SDAG-NEXT: v_readfirstlane_b32 s7, v6
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302533; GFX802-SDAG-NEXT: s_waitcnt vmcnt(1)
Pierre van Houtryve52317362025-02-26 13:14:03 +01002534; GFX802-SDAG-NEXT: v_writelane_b32 v13, s8, m0
2535; GFX802-SDAG-NEXT: v_writelane_b32 v12, s9, m0
2536; GFX802-SDAG-NEXT: v_writelane_b32 v11, s10, m0
2537; GFX802-SDAG-NEXT: v_writelane_b32 v10, s11, m0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302538; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
Pierre van Houtryve52317362025-02-26 13:14:03 +01002539; GFX802-SDAG-NEXT: v_writelane_b32 v16, s5, m0
2540; GFX802-SDAG-NEXT: v_writelane_b32 v15, s6, m0
2541; GFX802-SDAG-NEXT: v_writelane_b32 v14, s7, m0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302542; GFX802-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[10:13]
2543; GFX802-SDAG-NEXT: flat_store_dwordx3 v[17:18], v[14:16]
2544; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2545; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2546;
2547; GFX1010-SDAG-LABEL: test_writelane_v7i32:
2548; GFX1010-SDAG: ; %bb.0:
2549; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2550; GFX1010-SDAG-NEXT: s_clause 0x1
2551; GFX1010-SDAG-NEXT: global_load_dwordx3 v[14:16], v[0:1], off offset:16
2552; GFX1010-SDAG-NEXT: global_load_dwordx4 v[10:13], v[0:1], off
2553; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v9
2554; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s8, v5
2555; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s9, v4
2556; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s10, v3
2557; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s11, v2
2558; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v8
2559; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v7
2560; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v6
2561; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(1)
2562; GFX1010-SDAG-NEXT: v_writelane_b32 v16, s4, s5
2563; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2564; GFX1010-SDAG-NEXT: v_writelane_b32 v13, s8, s5
2565; GFX1010-SDAG-NEXT: v_writelane_b32 v12, s9, s5
2566; GFX1010-SDAG-NEXT: v_writelane_b32 v11, s10, s5
2567; GFX1010-SDAG-NEXT: v_writelane_b32 v10, s11, s5
2568; GFX1010-SDAG-NEXT: v_writelane_b32 v15, s6, s5
2569; GFX1010-SDAG-NEXT: v_writelane_b32 v14, s7, s5
2570; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
2571; GFX1010-SDAG-NEXT: global_store_dwordx3 v[0:1], v[14:16], off offset:16
2572; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2573;
2574; GFX1100-SDAG-LABEL: test_writelane_v7i32:
2575; GFX1100-SDAG: ; %bb.0:
2576; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2577; GFX1100-SDAG-NEXT: s_clause 0x1
2578; GFX1100-SDAG-NEXT: global_load_b96 v[14:16], v[0:1], off offset:16
2579; GFX1100-SDAG-NEXT: global_load_b128 v[10:13], v[0:1], off
2580; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v9
2581; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s4, v5
2582; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s5, v4
2583; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s6, v3
2584; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s7, v2
2585; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v8
2586; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v7
2587; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v6
2588; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(1)
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302589; GFX1100-SDAG-NEXT: v_writelane_b32 v16, s0, s1
2590; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
2591; GFX1100-SDAG-NEXT: v_writelane_b32 v13, s4, s1
2592; GFX1100-SDAG-NEXT: v_writelane_b32 v12, s5, s1
2593; GFX1100-SDAG-NEXT: v_writelane_b32 v11, s6, s1
2594; GFX1100-SDAG-NEXT: v_writelane_b32 v10, s7, s1
2595; GFX1100-SDAG-NEXT: v_writelane_b32 v15, s2, s1
2596; GFX1100-SDAG-NEXT: v_writelane_b32 v14, s3, s1
2597; GFX1100-SDAG-NEXT: s_clause 0x1
2598; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[10:13], off
2599; GFX1100-SDAG-NEXT: global_store_b96 v[0:1], v[14:16], off offset:16
2600; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2601;
2602; GFX802-GISEL-LABEL: test_writelane_v7i32:
2603; GFX802-GISEL: ; %bb.0:
2604; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2605; GFX802-GISEL-NEXT: v_add_u32_e32 v18, vcc, 16, v0
2606; GFX802-GISEL-NEXT: flat_load_dwordx4 v[10:13], v[0:1]
2607; GFX802-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v1, vcc
2608; GFX802-GISEL-NEXT: flat_load_dwordx4 v[14:17], v[18:19]
2609; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v9
2610; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2611; GFX802-GISEL-NEXT: v_readfirstlane_b32 s6, v3
2612; GFX802-GISEL-NEXT: v_readfirstlane_b32 s7, v4
2613; GFX802-GISEL-NEXT: v_readfirstlane_b32 s8, v5
2614; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
2615; GFX802-GISEL-NEXT: v_readfirstlane_b32 s9, v6
2616; GFX802-GISEL-NEXT: v_readfirstlane_b32 s10, v7
2617; GFX802-GISEL-NEXT: v_readfirstlane_b32 s11, v8
2618; GFX802-GISEL-NEXT: s_waitcnt vmcnt(1)
2619; GFX802-GISEL-NEXT: v_writelane_b32 v10, s4, m0
2620; GFX802-GISEL-NEXT: v_writelane_b32 v11, s6, m0
2621; GFX802-GISEL-NEXT: v_writelane_b32 v12, s7, m0
2622; GFX802-GISEL-NEXT: v_writelane_b32 v13, s8, m0
2623; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2624; GFX802-GISEL-NEXT: v_writelane_b32 v14, s9, m0
2625; GFX802-GISEL-NEXT: v_writelane_b32 v15, s10, m0
2626; GFX802-GISEL-NEXT: v_writelane_b32 v16, s11, m0
2627; GFX802-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[10:13]
2628; GFX802-GISEL-NEXT: flat_store_dwordx3 v[18:19], v[14:16]
2629; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2630; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2631;
2632; GFX1010-GISEL-LABEL: test_writelane_v7i32:
2633; GFX1010-GISEL: ; %bb.0:
2634; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2635; GFX1010-GISEL-NEXT: s_clause 0x1
2636; GFX1010-GISEL-NEXT: global_load_dwordx4 v[10:13], v[0:1], off
2637; GFX1010-GISEL-NEXT: global_load_dwordx4 v[14:17], v[0:1], off offset:16
2638; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2639; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v9
2640; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s6, v3
2641; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s7, v4
2642; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s8, v5
2643; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s9, v6
2644; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s10, v7
2645; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s11, v8
2646; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(1)
2647; GFX1010-GISEL-NEXT: v_writelane_b32 v10, s4, s5
2648; GFX1010-GISEL-NEXT: v_writelane_b32 v11, s6, s5
2649; GFX1010-GISEL-NEXT: v_writelane_b32 v12, s7, s5
2650; GFX1010-GISEL-NEXT: v_writelane_b32 v13, s8, s5
2651; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2652; GFX1010-GISEL-NEXT: v_writelane_b32 v14, s9, s5
2653; GFX1010-GISEL-NEXT: v_writelane_b32 v15, s10, s5
2654; GFX1010-GISEL-NEXT: v_writelane_b32 v16, s11, s5
2655; GFX1010-GISEL-NEXT: global_store_dwordx4 v[0:1], v[10:13], off
2656; GFX1010-GISEL-NEXT: global_store_dwordx3 v[0:1], v[14:16], off offset:16
2657; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2658;
2659; GFX1100-GISEL-LABEL: test_writelane_v7i32:
2660; GFX1100-GISEL: ; %bb.0:
2661; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2662; GFX1100-GISEL-NEXT: s_clause 0x1
2663; GFX1100-GISEL-NEXT: global_load_b128 v[10:13], v[0:1], off
2664; GFX1100-GISEL-NEXT: global_load_b128 v[14:17], v[0:1], off offset:16
2665; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2666; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v9
2667; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v3
2668; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s3, v4
2669; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s4, v5
2670; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s5, v6
2671; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s6, v7
2672; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s7, v8
2673; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(1)
2674; GFX1100-GISEL-NEXT: v_writelane_b32 v10, s0, s1
2675; GFX1100-GISEL-NEXT: v_writelane_b32 v11, s2, s1
2676; GFX1100-GISEL-NEXT: v_writelane_b32 v12, s3, s1
2677; GFX1100-GISEL-NEXT: v_writelane_b32 v13, s4, s1
2678; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
2679; GFX1100-GISEL-NEXT: v_writelane_b32 v14, s5, s1
2680; GFX1100-GISEL-NEXT: v_writelane_b32 v15, s6, s1
2681; GFX1100-GISEL-NEXT: v_writelane_b32 v16, s7, s1
2682; GFX1100-GISEL-NEXT: s_clause 0x1
2683; GFX1100-GISEL-NEXT: global_store_b128 v[0:1], v[10:13], off
2684; GFX1100-GISEL-NEXT: global_store_b96 v[0:1], v[14:16], off offset:16
2685; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2686 %oldval = load <7 x i32>, ptr addrspace(1) %out
2687 %writelane = call <7 x i32> @llvm.amdgcn.writelane.v7i32(<7 x i32> %src, i32 %src1, <7 x i32> %oldval)
2688 store <7 x i32> %writelane, ptr addrspace(1) %out, align 4
2689 ret void
2690}
2691
2692define void @test_writelane_v8i16(ptr addrspace(1) %out, <8 x i16> %src, i32 %src1) {
2693; GFX802-SDAG-LABEL: test_writelane_v8i16:
2694; GFX802-SDAG: ; %bb.0:
2695; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2696; GFX802-SDAG-NEXT: flat_load_dwordx4 v[7:10], v[0:1]
Pierre van Houtryve52317362025-02-26 13:14:03 +01002697; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v6
2698; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v5
2699; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v4
2700; GFX802-SDAG-NEXT: v_readfirstlane_b32 s7, v3
2701; GFX802-SDAG-NEXT: v_readfirstlane_b32 s8, v2
2702; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302703; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
Pierre van Houtryve52317362025-02-26 13:14:03 +01002704; GFX802-SDAG-NEXT: v_writelane_b32 v10, s5, m0
2705; GFX802-SDAG-NEXT: v_writelane_b32 v9, s6, m0
2706; GFX802-SDAG-NEXT: v_writelane_b32 v8, s7, m0
2707; GFX802-SDAG-NEXT: v_writelane_b32 v7, s8, m0
Vikram Hegde5feb32b2024-06-25 14:35:19 +05302708; GFX802-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
2709; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2710; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2711;
2712; GFX1010-SDAG-LABEL: test_writelane_v8i16:
2713; GFX1010-SDAG: ; %bb.0:
2714; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2715; GFX1010-SDAG-NEXT: global_load_dwordx4 v[7:10], v[0:1], off
2716; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v5
2717; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v6
2718; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v4
2719; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v3
2720; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s8, v2
2721; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2722; GFX1010-SDAG-NEXT: v_writelane_b32 v10, s4, s5
2723; GFX1010-SDAG-NEXT: v_writelane_b32 v9, s6, s5
2724; GFX1010-SDAG-NEXT: v_writelane_b32 v8, s7, s5
2725; GFX1010-SDAG-NEXT: v_writelane_b32 v7, s8, s5
2726; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
2727; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2728;
2729; GFX1100-SDAG-LABEL: test_writelane_v8i16:
2730; GFX1100-SDAG: ; %bb.0:
2731; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2732; GFX1100-SDAG-NEXT: global_load_b128 v[7:10], v[0:1], off
2733; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v5
2734; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v6
2735; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v4
2736; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v3
2737; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2738; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
2739; GFX1100-SDAG-NEXT: v_writelane_b32 v10, s0, s1
2740; GFX1100-SDAG-NEXT: v_writelane_b32 v9, s2, s1
2741; GFX1100-SDAG-NEXT: v_writelane_b32 v8, s3, s1
2742; GFX1100-SDAG-NEXT: v_writelane_b32 v7, s4, s1
2743; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[7:10], off
2744; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2745;
2746; GFX802-GISEL-LABEL: test_writelane_v8i16:
2747; GFX802-GISEL: ; %bb.0:
2748; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2749; GFX802-GISEL-NEXT: flat_load_dwordx4 v[7:10], v[0:1]
2750; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v6
2751; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2752; GFX802-GISEL-NEXT: v_readfirstlane_b32 s6, v3
2753; GFX802-GISEL-NEXT: v_readfirstlane_b32 s7, v4
2754; GFX802-GISEL-NEXT: v_readfirstlane_b32 s8, v5
2755; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
2756; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2757; GFX802-GISEL-NEXT: v_writelane_b32 v7, s4, m0
2758; GFX802-GISEL-NEXT: v_writelane_b32 v8, s6, m0
2759; GFX802-GISEL-NEXT: v_writelane_b32 v9, s7, m0
2760; GFX802-GISEL-NEXT: v_writelane_b32 v10, s8, m0
2761; GFX802-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
2762; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2763; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2764;
2765; GFX1010-GISEL-LABEL: test_writelane_v8i16:
2766; GFX1010-GISEL: ; %bb.0:
2767; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2768; GFX1010-GISEL-NEXT: global_load_dwordx4 v[7:10], v[0:1], off
2769; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2770; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v6
2771; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s6, v3
2772; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s7, v4
2773; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s8, v5
2774; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2775; GFX1010-GISEL-NEXT: v_writelane_b32 v7, s4, s5
2776; GFX1010-GISEL-NEXT: v_writelane_b32 v8, s6, s5
2777; GFX1010-GISEL-NEXT: v_writelane_b32 v9, s7, s5
2778; GFX1010-GISEL-NEXT: v_writelane_b32 v10, s8, s5
2779; GFX1010-GISEL-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
2780; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2781;
2782; GFX1100-GISEL-LABEL: test_writelane_v8i16:
2783; GFX1100-GISEL: ; %bb.0:
2784; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2785; GFX1100-GISEL-NEXT: global_load_b128 v[7:10], v[0:1], off
2786; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2787; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v6
2788; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v3
2789; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s3, v4
2790; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s4, v5
2791; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
2792; GFX1100-GISEL-NEXT: v_writelane_b32 v7, s0, s1
2793; GFX1100-GISEL-NEXT: v_writelane_b32 v8, s2, s1
2794; GFX1100-GISEL-NEXT: v_writelane_b32 v9, s3, s1
2795; GFX1100-GISEL-NEXT: v_writelane_b32 v10, s4, s1
2796; GFX1100-GISEL-NEXT: global_store_b128 v[0:1], v[7:10], off
2797; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2798 %oldval = load <8 x i16>, ptr addrspace(1) %out
2799 %writelane = call <8 x i16> @llvm.amdgcn.writelane.v8i16(<8 x i16> %src, i32 %src1, <8 x i16> %oldval)
2800 store <8 x i16> %writelane, ptr addrspace(1) %out, align 4
2801 ret void
2802}
2803
Vikram Hegde123b0e22025-04-16 11:28:28 +05302804define void @test_writelane_v2i64(ptr addrspace(1) %out, <2 x i64> %src, i32 %src1) {
2805; GFX802-SDAG-LABEL: test_writelane_v2i64:
2806; GFX802-SDAG: ; %bb.0:
2807; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2808; GFX802-SDAG-NEXT: flat_load_dwordx4 v[7:10], v[0:1]
2809; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v6
2810; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v5
2811; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v4
2812; GFX802-SDAG-NEXT: v_readfirstlane_b32 s7, v3
2813; GFX802-SDAG-NEXT: v_readfirstlane_b32 s8, v2
2814; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
2815; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2816; GFX802-SDAG-NEXT: v_writelane_b32 v10, s5, m0
2817; GFX802-SDAG-NEXT: v_writelane_b32 v9, s6, m0
2818; GFX802-SDAG-NEXT: v_writelane_b32 v8, s7, m0
2819; GFX802-SDAG-NEXT: v_writelane_b32 v7, s8, m0
2820; GFX802-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
2821; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2822; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2823;
2824; GFX1010-SDAG-LABEL: test_writelane_v2i64:
2825; GFX1010-SDAG: ; %bb.0:
2826; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2827; GFX1010-SDAG-NEXT: global_load_dwordx4 v[7:10], v[0:1], off
2828; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v5
2829; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v6
2830; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v4
2831; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v3
2832; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s8, v2
2833; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2834; GFX1010-SDAG-NEXT: v_writelane_b32 v10, s4, s5
2835; GFX1010-SDAG-NEXT: v_writelane_b32 v9, s6, s5
2836; GFX1010-SDAG-NEXT: v_writelane_b32 v8, s7, s5
2837; GFX1010-SDAG-NEXT: v_writelane_b32 v7, s8, s5
2838; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
2839; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2840;
2841; GFX1100-SDAG-LABEL: test_writelane_v2i64:
2842; GFX1100-SDAG: ; %bb.0:
2843; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2844; GFX1100-SDAG-NEXT: global_load_b128 v[7:10], v[0:1], off
2845; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v5
2846; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v6
2847; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v4
2848; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v3
2849; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s4, v2
2850; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
2851; GFX1100-SDAG-NEXT: v_writelane_b32 v10, s0, s1
2852; GFX1100-SDAG-NEXT: v_writelane_b32 v9, s2, s1
2853; GFX1100-SDAG-NEXT: v_writelane_b32 v8, s3, s1
2854; GFX1100-SDAG-NEXT: v_writelane_b32 v7, s4, s1
2855; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[7:10], off
2856; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2857;
2858; GFX802-GISEL-LABEL: test_writelane_v2i64:
2859; GFX802-GISEL: ; %bb.0:
2860; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2861; GFX802-GISEL-NEXT: flat_load_dwordx4 v[7:10], v[0:1]
2862; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v6
2863; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2864; GFX802-GISEL-NEXT: v_readfirstlane_b32 s6, v3
2865; GFX802-GISEL-NEXT: v_readfirstlane_b32 s7, v4
2866; GFX802-GISEL-NEXT: v_readfirstlane_b32 s8, v5
2867; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
2868; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2869; GFX802-GISEL-NEXT: v_writelane_b32 v7, s4, m0
2870; GFX802-GISEL-NEXT: v_writelane_b32 v8, s6, m0
2871; GFX802-GISEL-NEXT: v_writelane_b32 v9, s7, m0
2872; GFX802-GISEL-NEXT: v_writelane_b32 v10, s8, m0
2873; GFX802-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
2874; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
2875; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
2876;
2877; GFX1010-GISEL-LABEL: test_writelane_v2i64:
2878; GFX1010-GISEL: ; %bb.0:
2879; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2880; GFX1010-GISEL-NEXT: global_load_dwordx4 v[7:10], v[0:1], off
2881; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
2882; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v6
2883; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s6, v3
2884; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s7, v4
2885; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s8, v5
2886; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
2887; GFX1010-GISEL-NEXT: v_writelane_b32 v7, s4, s5
2888; GFX1010-GISEL-NEXT: v_writelane_b32 v8, s6, s5
2889; GFX1010-GISEL-NEXT: v_writelane_b32 v9, s7, s5
2890; GFX1010-GISEL-NEXT: v_writelane_b32 v10, s8, s5
2891; GFX1010-GISEL-NEXT: global_store_dwordx4 v[0:1], v[7:10], off
2892; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
2893;
2894; GFX1100-GISEL-LABEL: test_writelane_v2i64:
2895; GFX1100-GISEL: ; %bb.0:
2896; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2897; GFX1100-GISEL-NEXT: global_load_b128 v[7:10], v[0:1], off
2898; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
2899; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v6
2900; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v3
2901; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s3, v4
2902; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s4, v5
2903; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
2904; GFX1100-GISEL-NEXT: v_writelane_b32 v7, s0, s1
2905; GFX1100-GISEL-NEXT: v_writelane_b32 v8, s2, s1
2906; GFX1100-GISEL-NEXT: v_writelane_b32 v9, s3, s1
2907; GFX1100-GISEL-NEXT: v_writelane_b32 v10, s4, s1
2908; GFX1100-GISEL-NEXT: global_store_b128 v[0:1], v[7:10], off
2909; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
2910 %oldval = load <2 x i64>, ptr addrspace(1) %out
2911 %writelane = call <2 x i64> @llvm.amdgcn.writelane.v2i64(<2 x i64> %src, i32 %src1, <2 x i64> %oldval)
2912 store <2 x i64> %writelane, ptr addrspace(1) %out, align 4
2913 ret void
2914}
2915
2916define void @test_writelane_v3i64(ptr addrspace(1) %out, <3 x i64> %src, i32 %src1) {
2917; GFX802-SDAG-LABEL: test_writelane_v3i64:
2918; GFX802-SDAG: ; %bb.0:
2919; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2920; GFX802-SDAG-NEXT: v_add_u32_e32 v13, vcc, 16, v0
2921; GFX802-SDAG-NEXT: flat_load_dwordx4 v[9:12], v[0:1]
2922; GFX802-SDAG-NEXT: v_addc_u32_e32 v14, vcc, 0, v1, vcc
2923; GFX802-SDAG-NEXT: flat_load_dwordx2 v[15:16], v[13:14]
2924; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v8
2925; GFX802-SDAG-NEXT: v_readfirstlane_b32 s7, v5
2926; GFX802-SDAG-NEXT: v_readfirstlane_b32 s8, v4
2927; GFX802-SDAG-NEXT: v_readfirstlane_b32 s9, v3
2928; GFX802-SDAG-NEXT: v_readfirstlane_b32 s10, v2
2929; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
2930; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v7
2931; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v6
2932; GFX802-SDAG-NEXT: s_waitcnt vmcnt(1)
2933; GFX802-SDAG-NEXT: v_writelane_b32 v12, s7, m0
2934; GFX802-SDAG-NEXT: v_writelane_b32 v11, s8, m0
2935; GFX802-SDAG-NEXT: v_writelane_b32 v10, s9, m0
2936; GFX802-SDAG-NEXT: v_writelane_b32 v9, s10, m0
2937; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2938; GFX802-SDAG-NEXT: v_writelane_b32 v16, s5, m0
2939; GFX802-SDAG-NEXT: v_writelane_b32 v15, s6, m0
2940; GFX802-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[9:12]
2941; GFX802-SDAG-NEXT: flat_store_dwordx2 v[13:14], v[15:16]
2942; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
2943; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
2944;
2945; GFX1010-SDAG-LABEL: test_writelane_v3i64:
2946; GFX1010-SDAG: ; %bb.0:
2947; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2948; GFX1010-SDAG-NEXT: s_clause 0x1
2949; GFX1010-SDAG-NEXT: global_load_dwordx2 v[13:14], v[0:1], off offset:16
2950; GFX1010-SDAG-NEXT: global_load_dwordx4 v[9:12], v[0:1], off
2951; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v8
2952; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v5
2953; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s8, v4
2954; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s9, v3
2955; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s10, v2
2956; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v7
2957; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v6
2958; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(1)
2959; GFX1010-SDAG-NEXT: v_writelane_b32 v14, s4, s5
2960; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
2961; GFX1010-SDAG-NEXT: v_writelane_b32 v12, s7, s5
2962; GFX1010-SDAG-NEXT: v_writelane_b32 v11, s8, s5
2963; GFX1010-SDAG-NEXT: v_writelane_b32 v10, s9, s5
2964; GFX1010-SDAG-NEXT: v_writelane_b32 v9, s10, s5
2965; GFX1010-SDAG-NEXT: v_writelane_b32 v13, s6, s5
2966; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[9:12], off
2967; GFX1010-SDAG-NEXT: global_store_dwordx2 v[0:1], v[13:14], off offset:16
2968; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
2969;
2970; GFX1100-SDAG-LABEL: test_writelane_v3i64:
2971; GFX1100-SDAG: ; %bb.0:
2972; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2973; GFX1100-SDAG-NEXT: s_clause 0x1
2974; GFX1100-SDAG-NEXT: global_load_b64 v[13:14], v[0:1], off offset:16
2975; GFX1100-SDAG-NEXT: global_load_b128 v[9:12], v[0:1], off
2976; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v8
2977; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v5
2978; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s4, v4
2979; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s5, v3
2980; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s6, v2
2981; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v7
2982; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v6
2983; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(1)
2984; GFX1100-SDAG-NEXT: v_writelane_b32 v14, s0, s1
2985; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
2986; GFX1100-SDAG-NEXT: v_writelane_b32 v12, s3, s1
2987; GFX1100-SDAG-NEXT: v_writelane_b32 v11, s4, s1
2988; GFX1100-SDAG-NEXT: v_writelane_b32 v10, s5, s1
2989; GFX1100-SDAG-NEXT: v_writelane_b32 v9, s6, s1
2990; GFX1100-SDAG-NEXT: v_writelane_b32 v13, s2, s1
2991; GFX1100-SDAG-NEXT: s_clause 0x1
2992; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[9:12], off
2993; GFX1100-SDAG-NEXT: global_store_b64 v[0:1], v[13:14], off offset:16
2994; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
2995;
2996; GFX802-GISEL-LABEL: test_writelane_v3i64:
2997; GFX802-GISEL: ; %bb.0:
2998; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2999; GFX802-GISEL-NEXT: v_add_u32_e32 v17, vcc, 16, v0
3000; GFX802-GISEL-NEXT: v_addc_u32_e32 v18, vcc, 0, v1, vcc
3001; GFX802-GISEL-NEXT: flat_load_dwordx4 v[9:12], v[0:1]
3002; GFX802-GISEL-NEXT: flat_load_dwordx4 v[13:16], v[17:18]
3003; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v8
3004; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
3005; GFX802-GISEL-NEXT: v_readfirstlane_b32 s6, v3
3006; GFX802-GISEL-NEXT: v_readfirstlane_b32 s7, v4
3007; GFX802-GISEL-NEXT: v_readfirstlane_b32 s8, v5
3008; GFX802-GISEL-NEXT: v_readfirstlane_b32 s9, v6
3009; GFX802-GISEL-NEXT: v_readfirstlane_b32 s10, v7
3010; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
3011; GFX802-GISEL-NEXT: s_waitcnt vmcnt(1)
3012; GFX802-GISEL-NEXT: v_writelane_b32 v9, s4, m0
3013; GFX802-GISEL-NEXT: v_writelane_b32 v10, s6, m0
3014; GFX802-GISEL-NEXT: v_writelane_b32 v11, s7, m0
3015; GFX802-GISEL-NEXT: v_writelane_b32 v12, s8, m0
3016; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
3017; GFX802-GISEL-NEXT: v_writelane_b32 v13, s9, m0
3018; GFX802-GISEL-NEXT: v_writelane_b32 v14, s10, m0
3019; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, v13
3020; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, v14
3021; GFX802-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[9:12]
3022; GFX802-GISEL-NEXT: flat_store_dwordx2 v[17:18], v[2:3]
3023; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
3024; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
3025;
3026; GFX1010-GISEL-LABEL: test_writelane_v3i64:
3027; GFX1010-GISEL: ; %bb.0:
3028; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3029; GFX1010-GISEL-NEXT: s_clause 0x1
3030; GFX1010-GISEL-NEXT: global_load_dwordx4 v[9:12], v[0:1], off
3031; GFX1010-GISEL-NEXT: global_load_dwordx4 v[13:16], v[0:1], off offset:16
3032; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v8
3033; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s7, v6
3034; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s8, v7
3035; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
3036; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s6, v3
3037; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s9, v4
3038; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s10, v5
3039; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(1)
3040; GFX1010-GISEL-NEXT: v_writelane_b32 v9, s4, s5
3041; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
3042; GFX1010-GISEL-NEXT: v_writelane_b32 v13, s7, s5
3043; GFX1010-GISEL-NEXT: v_writelane_b32 v14, s8, s5
3044; GFX1010-GISEL-NEXT: v_writelane_b32 v10, s6, s5
3045; GFX1010-GISEL-NEXT: v_writelane_b32 v11, s9, s5
3046; GFX1010-GISEL-NEXT: v_writelane_b32 v12, s10, s5
3047; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, v13
3048; GFX1010-GISEL-NEXT: v_mov_b32_e32 v3, v14
3049; GFX1010-GISEL-NEXT: global_store_dwordx4 v[0:1], v[9:12], off
3050; GFX1010-GISEL-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:16
3051; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
3052;
3053; GFX1100-GISEL-LABEL: test_writelane_v3i64:
3054; GFX1100-GISEL: ; %bb.0:
3055; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3056; GFX1100-GISEL-NEXT: s_clause 0x1
3057; GFX1100-GISEL-NEXT: global_load_b128 v[9:12], v[0:1], off
3058; GFX1100-GISEL-NEXT: global_load_b128 v[13:16], v[0:1], off offset:16
3059; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v8
3060; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s3, v6
3061; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s4, v7
3062; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
3063; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v3
3064; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s5, v4
3065; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s6, v5
3066; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(1)
3067; GFX1100-GISEL-NEXT: v_writelane_b32 v9, s0, s1
3068; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
3069; GFX1100-GISEL-NEXT: v_writelane_b32 v13, s3, s1
3070; GFX1100-GISEL-NEXT: v_writelane_b32 v14, s4, s1
3071; GFX1100-GISEL-NEXT: v_writelane_b32 v10, s2, s1
3072; GFX1100-GISEL-NEXT: v_writelane_b32 v11, s5, s1
3073; GFX1100-GISEL-NEXT: v_writelane_b32 v12, s6, s1
3074; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, v13
3075; GFX1100-GISEL-NEXT: v_mov_b32_e32 v3, v14
3076; GFX1100-GISEL-NEXT: s_clause 0x1
3077; GFX1100-GISEL-NEXT: global_store_b128 v[0:1], v[9:12], off
3078; GFX1100-GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off offset:16
3079; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
3080 %oldval = load <3 x i64>, ptr addrspace(1) %out
3081 %writelane = call <3 x i64> @llvm.amdgcn.writelane.v2i64(<3 x i64> %src, i32 %src1, <3 x i64> %oldval)
3082 store <3 x i64> %writelane, ptr addrspace(1) %out, align 4
3083 ret void
3084}
3085
3086define void @test_writelane_v4f64(ptr addrspace(1) %out, <4 x double> %src, i32 %src1) {
3087; GFX802-SDAG-LABEL: test_writelane_v4f64:
3088; GFX802-SDAG: ; %bb.0:
3089; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3090; GFX802-SDAG-NEXT: v_add_u32_e32 v19, vcc, 16, v0
3091; GFX802-SDAG-NEXT: flat_load_dwordx4 v[11:14], v[0:1]
3092; GFX802-SDAG-NEXT: v_addc_u32_e32 v20, vcc, 0, v1, vcc
3093; GFX802-SDAG-NEXT: flat_load_dwordx4 v[15:18], v[19:20]
3094; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v10
3095; GFX802-SDAG-NEXT: v_readfirstlane_b32 s9, v5
3096; GFX802-SDAG-NEXT: v_readfirstlane_b32 s10, v4
3097; GFX802-SDAG-NEXT: v_readfirstlane_b32 s11, v3
3098; GFX802-SDAG-NEXT: v_readfirstlane_b32 s12, v2
3099; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
3100; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v9
3101; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v8
3102; GFX802-SDAG-NEXT: v_readfirstlane_b32 s7, v7
3103; GFX802-SDAG-NEXT: v_readfirstlane_b32 s8, v6
3104; GFX802-SDAG-NEXT: s_waitcnt vmcnt(1)
3105; GFX802-SDAG-NEXT: v_writelane_b32 v14, s9, m0
3106; GFX802-SDAG-NEXT: v_writelane_b32 v13, s10, m0
3107; GFX802-SDAG-NEXT: v_writelane_b32 v12, s11, m0
3108; GFX802-SDAG-NEXT: v_writelane_b32 v11, s12, m0
3109; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
3110; GFX802-SDAG-NEXT: v_writelane_b32 v18, s5, m0
3111; GFX802-SDAG-NEXT: v_writelane_b32 v17, s6, m0
3112; GFX802-SDAG-NEXT: v_writelane_b32 v16, s7, m0
3113; GFX802-SDAG-NEXT: v_writelane_b32 v15, s8, m0
3114; GFX802-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[11:14]
3115; GFX802-SDAG-NEXT: flat_store_dwordx4 v[19:20], v[15:18]
3116; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
3117; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
3118;
3119; GFX1010-SDAG-LABEL: test_writelane_v4f64:
3120; GFX1010-SDAG: ; %bb.0:
3121; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3122; GFX1010-SDAG-NEXT: s_clause 0x1
3123; GFX1010-SDAG-NEXT: global_load_dwordx4 v[11:14], v[0:1], off offset:16
3124; GFX1010-SDAG-NEXT: global_load_dwordx4 v[15:18], v[0:1], off
3125; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v10
3126; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s9, v5
3127; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s10, v4
3128; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s11, v3
3129; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s12, v2
3130; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v9
3131; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v8
3132; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v7
3133; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s8, v6
3134; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(1)
3135; GFX1010-SDAG-NEXT: v_writelane_b32 v14, s4, s5
3136; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
3137; GFX1010-SDAG-NEXT: v_writelane_b32 v18, s9, s5
3138; GFX1010-SDAG-NEXT: v_writelane_b32 v17, s10, s5
3139; GFX1010-SDAG-NEXT: v_writelane_b32 v16, s11, s5
3140; GFX1010-SDAG-NEXT: v_writelane_b32 v15, s12, s5
3141; GFX1010-SDAG-NEXT: v_writelane_b32 v13, s6, s5
3142; GFX1010-SDAG-NEXT: v_writelane_b32 v12, s7, s5
3143; GFX1010-SDAG-NEXT: v_writelane_b32 v11, s8, s5
3144; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[15:18], off
3145; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[11:14], off offset:16
3146; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
3147;
3148; GFX1100-SDAG-LABEL: test_writelane_v4f64:
3149; GFX1100-SDAG: ; %bb.0:
3150; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3151; GFX1100-SDAG-NEXT: s_clause 0x1
3152; GFX1100-SDAG-NEXT: global_load_b128 v[11:14], v[0:1], off offset:16
3153; GFX1100-SDAG-NEXT: global_load_b128 v[15:18], v[0:1], off
3154; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v10
3155; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s5, v5
3156; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s6, v4
3157; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s7, v3
3158; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s8, v2
3159; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v9
3160; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v8
3161; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v7
3162; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s4, v6
3163; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(1)
3164; GFX1100-SDAG-NEXT: v_writelane_b32 v14, s0, s1
3165; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
3166; GFX1100-SDAG-NEXT: v_writelane_b32 v18, s5, s1
3167; GFX1100-SDAG-NEXT: v_writelane_b32 v17, s6, s1
3168; GFX1100-SDAG-NEXT: v_writelane_b32 v16, s7, s1
3169; GFX1100-SDAG-NEXT: v_writelane_b32 v15, s8, s1
3170; GFX1100-SDAG-NEXT: v_writelane_b32 v13, s2, s1
3171; GFX1100-SDAG-NEXT: v_writelane_b32 v12, s3, s1
3172; GFX1100-SDAG-NEXT: v_writelane_b32 v11, s4, s1
3173; GFX1100-SDAG-NEXT: s_clause 0x1
3174; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[15:18], off
3175; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[11:14], off offset:16
3176; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
3177;
3178; GFX802-GISEL-LABEL: test_writelane_v4f64:
3179; GFX802-GISEL: ; %bb.0:
3180; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3181; GFX802-GISEL-NEXT: v_add_u32_e32 v19, vcc, 16, v0
3182; GFX802-GISEL-NEXT: flat_load_dwordx4 v[11:14], v[0:1]
3183; GFX802-GISEL-NEXT: v_addc_u32_e32 v20, vcc, 0, v1, vcc
3184; GFX802-GISEL-NEXT: flat_load_dwordx4 v[15:18], v[19:20]
3185; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v10
3186; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
3187; GFX802-GISEL-NEXT: v_readfirstlane_b32 s6, v3
3188; GFX802-GISEL-NEXT: v_readfirstlane_b32 s7, v4
3189; GFX802-GISEL-NEXT: v_readfirstlane_b32 s8, v5
3190; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
3191; GFX802-GISEL-NEXT: v_readfirstlane_b32 s9, v6
3192; GFX802-GISEL-NEXT: v_readfirstlane_b32 s10, v7
3193; GFX802-GISEL-NEXT: v_readfirstlane_b32 s11, v8
3194; GFX802-GISEL-NEXT: v_readfirstlane_b32 s12, v9
3195; GFX802-GISEL-NEXT: s_waitcnt vmcnt(1)
3196; GFX802-GISEL-NEXT: v_writelane_b32 v11, s4, m0
3197; GFX802-GISEL-NEXT: v_writelane_b32 v12, s6, m0
3198; GFX802-GISEL-NEXT: v_writelane_b32 v13, s7, m0
3199; GFX802-GISEL-NEXT: v_writelane_b32 v14, s8, m0
3200; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
3201; GFX802-GISEL-NEXT: v_writelane_b32 v15, s9, m0
3202; GFX802-GISEL-NEXT: v_writelane_b32 v16, s10, m0
3203; GFX802-GISEL-NEXT: v_writelane_b32 v17, s11, m0
3204; GFX802-GISEL-NEXT: v_writelane_b32 v18, s12, m0
3205; GFX802-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[11:14]
3206; GFX802-GISEL-NEXT: flat_store_dwordx4 v[19:20], v[15:18]
3207; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
3208; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
3209;
3210; GFX1010-GISEL-LABEL: test_writelane_v4f64:
3211; GFX1010-GISEL: ; %bb.0:
3212; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3213; GFX1010-GISEL-NEXT: s_clause 0x1
3214; GFX1010-GISEL-NEXT: global_load_dwordx4 v[11:14], v[0:1], off
3215; GFX1010-GISEL-NEXT: global_load_dwordx4 v[15:18], v[0:1], off offset:16
3216; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
3217; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v10
3218; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s6, v3
3219; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s7, v4
3220; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s8, v5
3221; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s9, v6
3222; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s10, v7
3223; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s11, v8
3224; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s12, v9
3225; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(1)
3226; GFX1010-GISEL-NEXT: v_writelane_b32 v11, s4, s5
3227; GFX1010-GISEL-NEXT: v_writelane_b32 v12, s6, s5
3228; GFX1010-GISEL-NEXT: v_writelane_b32 v13, s7, s5
3229; GFX1010-GISEL-NEXT: v_writelane_b32 v14, s8, s5
3230; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
3231; GFX1010-GISEL-NEXT: v_writelane_b32 v15, s9, s5
3232; GFX1010-GISEL-NEXT: v_writelane_b32 v16, s10, s5
3233; GFX1010-GISEL-NEXT: v_writelane_b32 v17, s11, s5
3234; GFX1010-GISEL-NEXT: v_writelane_b32 v18, s12, s5
3235; GFX1010-GISEL-NEXT: global_store_dwordx4 v[0:1], v[11:14], off
3236; GFX1010-GISEL-NEXT: global_store_dwordx4 v[0:1], v[15:18], off offset:16
3237; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
3238;
3239; GFX1100-GISEL-LABEL: test_writelane_v4f64:
3240; GFX1100-GISEL: ; %bb.0:
3241; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3242; GFX1100-GISEL-NEXT: s_clause 0x1
3243; GFX1100-GISEL-NEXT: global_load_b128 v[11:14], v[0:1], off
3244; GFX1100-GISEL-NEXT: global_load_b128 v[15:18], v[0:1], off offset:16
3245; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
3246; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v10
3247; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v3
3248; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s3, v4
3249; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s4, v5
3250; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s5, v6
3251; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s6, v7
3252; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s7, v8
3253; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s8, v9
3254; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(1)
3255; GFX1100-GISEL-NEXT: v_writelane_b32 v11, s0, s1
3256; GFX1100-GISEL-NEXT: v_writelane_b32 v12, s2, s1
3257; GFX1100-GISEL-NEXT: v_writelane_b32 v13, s3, s1
3258; GFX1100-GISEL-NEXT: v_writelane_b32 v14, s4, s1
3259; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
3260; GFX1100-GISEL-NEXT: v_writelane_b32 v15, s5, s1
3261; GFX1100-GISEL-NEXT: v_writelane_b32 v16, s6, s1
3262; GFX1100-GISEL-NEXT: v_writelane_b32 v17, s7, s1
3263; GFX1100-GISEL-NEXT: v_writelane_b32 v18, s8, s1
3264; GFX1100-GISEL-NEXT: s_clause 0x1
3265; GFX1100-GISEL-NEXT: global_store_b128 v[0:1], v[11:14], off
3266; GFX1100-GISEL-NEXT: global_store_b128 v[0:1], v[15:18], off offset:16
3267; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
3268 %oldval = load <4 x double>, ptr addrspace(1) %out
3269 %writelane = call <4 x double> @llvm.amdgcn.writelane.v4f64(<4 x double> %src, i32 %src1, <4 x double> %oldval)
3270 store <4 x double> %writelane, ptr addrspace(1) %out, align 4
3271 ret void
3272}
3273
3274define void @test_writelane_v8f64(ptr addrspace(1) %out, <8 x double> %src, i32 %src1) {
3275; GFX802-SDAG-LABEL: test_writelane_v8f64:
3276; GFX802-SDAG: ; %bb.0:
3277; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3278; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v18
3279; GFX802-SDAG-NEXT: flat_load_dwordx4 v[18:21], v[0:1]
3280; GFX802-SDAG-NEXT: v_add_u32_e32 v22, vcc, 16, v0
3281; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v5
3282; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v4
3283; GFX802-SDAG-NEXT: v_readfirstlane_b32 s7, v3
3284; GFX802-SDAG-NEXT: v_readfirstlane_b32 s8, v2
3285; GFX802-SDAG-NEXT: v_addc_u32_e32 v23, vcc, 0, v1, vcc
3286; GFX802-SDAG-NEXT: flat_load_dwordx4 v[2:5], v[22:23]
3287; GFX802-SDAG-NEXT: s_mov_b32 m0, s4
3288; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v9
3289; GFX802-SDAG-NEXT: v_readfirstlane_b32 s10, v15
3290; GFX802-SDAG-NEXT: v_readfirstlane_b32 s11, v14
3291; GFX802-SDAG-NEXT: v_readfirstlane_b32 s12, v13
3292; GFX802-SDAG-NEXT: v_readfirstlane_b32 s13, v12
3293; GFX802-SDAG-NEXT: v_readfirstlane_b32 s14, v11
3294; GFX802-SDAG-NEXT: v_readfirstlane_b32 s15, v10
3295; GFX802-SDAG-NEXT: v_readfirstlane_b32 s9, v16
3296; GFX802-SDAG-NEXT: s_waitcnt vmcnt(1)
3297; GFX802-SDAG-NEXT: v_writelane_b32 v21, s5, m0
3298; GFX802-SDAG-NEXT: v_writelane_b32 v20, s6, m0
3299; GFX802-SDAG-NEXT: v_writelane_b32 v19, s7, m0
3300; GFX802-SDAG-NEXT: v_writelane_b32 v18, s8, m0
3301; GFX802-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[18:21]
3302; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v8
3303; GFX802-SDAG-NEXT: v_add_u32_e32 v18, vcc, 32, v0
3304; GFX802-SDAG-NEXT: v_addc_u32_e32 v19, vcc, 0, v1, vcc
3305; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, 48, v0
3306; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3307; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v7
3308; GFX802-SDAG-NEXT: v_readfirstlane_b32 s7, v6
3309; GFX802-SDAG-NEXT: flat_load_dwordx4 v[6:9], v[0:1]
3310; GFX802-SDAG-NEXT: flat_load_dwordx4 v[12:15], v[18:19]
3311; GFX802-SDAG-NEXT: v_readfirstlane_b32 s8, v17
3312; GFX802-SDAG-NEXT: s_waitcnt vmcnt(3)
3313; GFX802-SDAG-NEXT: v_writelane_b32 v5, s4, m0
3314; GFX802-SDAG-NEXT: v_writelane_b32 v4, s5, m0
3315; GFX802-SDAG-NEXT: v_writelane_b32 v3, s6, m0
3316; GFX802-SDAG-NEXT: v_writelane_b32 v2, s7, m0
3317; GFX802-SDAG-NEXT: s_waitcnt vmcnt(1)
3318; GFX802-SDAG-NEXT: v_writelane_b32 v9, s8, m0
3319; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
3320; GFX802-SDAG-NEXT: v_writelane_b32 v15, s12, m0
3321; GFX802-SDAG-NEXT: v_writelane_b32 v14, s13, m0
3322; GFX802-SDAG-NEXT: v_writelane_b32 v13, s14, m0
3323; GFX802-SDAG-NEXT: v_writelane_b32 v12, s15, m0
3324; GFX802-SDAG-NEXT: v_writelane_b32 v8, s9, m0
3325; GFX802-SDAG-NEXT: v_writelane_b32 v7, s10, m0
3326; GFX802-SDAG-NEXT: v_writelane_b32 v6, s11, m0
3327; GFX802-SDAG-NEXT: flat_store_dwordx4 v[18:19], v[12:15]
3328; GFX802-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[6:9]
3329; GFX802-SDAG-NEXT: flat_store_dwordx4 v[22:23], v[2:5]
3330; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0)
3331; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31]
3332;
3333; GFX1010-SDAG-LABEL: test_writelane_v8f64:
3334; GFX1010-SDAG: ; %bb.0:
3335; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3336; GFX1010-SDAG-NEXT: s_clause 0x3
3337; GFX1010-SDAG-NEXT: global_load_dwordx4 v[19:22], v[0:1], off offset:16
3338; GFX1010-SDAG-NEXT: global_load_dwordx4 v[23:26], v[0:1], off
3339; GFX1010-SDAG-NEXT: global_load_dwordx4 v[27:30], v[0:1], off offset:48
3340; GFX1010-SDAG-NEXT: global_load_dwordx4 v[31:34], v[0:1], off offset:32
3341; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v18
3342; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s17, v13
3343; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s18, v12
3344; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s19, v11
3345; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s20, v10
3346; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s13, v17
3347; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s14, v16
3348; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s15, v15
3349; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s16, v14
3350; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s9, v5
3351; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s10, v4
3352; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s11, v3
3353; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s12, v2
3354; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v9
3355; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v8
3356; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v7
3357; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s8, v6
3358; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(3)
3359; GFX1010-SDAG-NEXT: v_writelane_b32 v22, s4, s5
3360; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(2)
3361; GFX1010-SDAG-NEXT: v_writelane_b32 v26, s9, s5
3362; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(1)
3363; GFX1010-SDAG-NEXT: v_writelane_b32 v30, s13, s5
3364; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0)
3365; GFX1010-SDAG-NEXT: v_writelane_b32 v34, s17, s5
3366; GFX1010-SDAG-NEXT: v_writelane_b32 v33, s18, s5
3367; GFX1010-SDAG-NEXT: v_writelane_b32 v32, s19, s5
3368; GFX1010-SDAG-NEXT: v_writelane_b32 v31, s20, s5
3369; GFX1010-SDAG-NEXT: v_writelane_b32 v29, s14, s5
3370; GFX1010-SDAG-NEXT: v_writelane_b32 v28, s15, s5
3371; GFX1010-SDAG-NEXT: v_writelane_b32 v27, s16, s5
3372; GFX1010-SDAG-NEXT: v_writelane_b32 v25, s10, s5
3373; GFX1010-SDAG-NEXT: v_writelane_b32 v24, s11, s5
3374; GFX1010-SDAG-NEXT: v_writelane_b32 v23, s12, s5
3375; GFX1010-SDAG-NEXT: v_writelane_b32 v21, s6, s5
3376; GFX1010-SDAG-NEXT: v_writelane_b32 v20, s7, s5
3377; GFX1010-SDAG-NEXT: v_writelane_b32 v19, s8, s5
3378; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[31:34], off offset:32
3379; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[27:30], off offset:48
3380; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[23:26], off
3381; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[19:22], off offset:16
3382; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31]
3383;
3384; GFX1100-SDAG-LABEL: test_writelane_v8f64:
3385; GFX1100-SDAG: ; %bb.0:
3386; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3387; GFX1100-SDAG-NEXT: s_clause 0x3
3388; GFX1100-SDAG-NEXT: global_load_b128 v[19:22], v[0:1], off offset:16
3389; GFX1100-SDAG-NEXT: global_load_b128 v[23:26], v[0:1], off
3390; GFX1100-SDAG-NEXT: global_load_b128 v[27:30], v[0:1], off offset:48
3391; GFX1100-SDAG-NEXT: global_load_b128 v[31:34], v[0:1], off offset:32
3392; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v18
3393; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s13, v13
3394; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s14, v12
3395; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s15, v11
3396; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s16, v10
3397; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s9, v17
3398; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s10, v16
3399; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s11, v15
3400; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s12, v14
3401; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s5, v5
3402; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s6, v4
3403; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s7, v3
3404; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s8, v2
3405; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v9
3406; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v8
3407; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v7
3408; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s4, v6
3409; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(3)
3410; GFX1100-SDAG-NEXT: v_writelane_b32 v22, s0, s1
3411; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(2)
3412; GFX1100-SDAG-NEXT: v_writelane_b32 v26, s5, s1
3413; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(1)
3414; GFX1100-SDAG-NEXT: v_writelane_b32 v30, s9, s1
3415; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0)
3416; GFX1100-SDAG-NEXT: v_writelane_b32 v34, s13, s1
3417; GFX1100-SDAG-NEXT: v_writelane_b32 v33, s14, s1
3418; GFX1100-SDAG-NEXT: v_writelane_b32 v32, s15, s1
3419; GFX1100-SDAG-NEXT: v_writelane_b32 v31, s16, s1
3420; GFX1100-SDAG-NEXT: v_writelane_b32 v29, s10, s1
3421; GFX1100-SDAG-NEXT: v_writelane_b32 v28, s11, s1
3422; GFX1100-SDAG-NEXT: v_writelane_b32 v27, s12, s1
3423; GFX1100-SDAG-NEXT: v_writelane_b32 v25, s6, s1
3424; GFX1100-SDAG-NEXT: v_writelane_b32 v24, s7, s1
3425; GFX1100-SDAG-NEXT: v_writelane_b32 v23, s8, s1
3426; GFX1100-SDAG-NEXT: v_writelane_b32 v21, s2, s1
3427; GFX1100-SDAG-NEXT: v_writelane_b32 v20, s3, s1
3428; GFX1100-SDAG-NEXT: v_writelane_b32 v19, s4, s1
3429; GFX1100-SDAG-NEXT: s_clause 0x3
3430; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[31:34], off offset:32
3431; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[27:30], off offset:48
3432; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[23:26], off
3433; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[19:22], off offset:16
3434; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
3435;
3436; GFX802-GISEL-LABEL: test_writelane_v8f64:
3437; GFX802-GISEL: ; %bb.0:
3438; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3439; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v18
3440; GFX802-GISEL-NEXT: flat_load_dwordx4 v[18:21], v[0:1]
3441; GFX802-GISEL-NEXT: v_add_u32_e32 v22, vcc, 16, v0
3442; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
3443; GFX802-GISEL-NEXT: v_readfirstlane_b32 s6, v3
3444; GFX802-GISEL-NEXT: v_readfirstlane_b32 s7, v4
3445; GFX802-GISEL-NEXT: v_readfirstlane_b32 s8, v5
3446; GFX802-GISEL-NEXT: v_addc_u32_e32 v23, vcc, 0, v1, vcc
3447; GFX802-GISEL-NEXT: flat_load_dwordx4 v[2:5], v[22:23]
3448; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
3449; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v7
3450; GFX802-GISEL-NEXT: v_readfirstlane_b32 s9, v11
3451; GFX802-GISEL-NEXT: v_readfirstlane_b32 s10, v12
3452; GFX802-GISEL-NEXT: v_readfirstlane_b32 s11, v13
3453; GFX802-GISEL-NEXT: v_readfirstlane_b32 s12, v14
3454; GFX802-GISEL-NEXT: v_readfirstlane_b32 s13, v15
3455; GFX802-GISEL-NEXT: v_readfirstlane_b32 s14, v16
3456; GFX802-GISEL-NEXT: v_readfirstlane_b32 s15, v17
3457; GFX802-GISEL-NEXT: s_waitcnt vmcnt(1)
3458; GFX802-GISEL-NEXT: v_writelane_b32 v18, s4, m0
3459; GFX802-GISEL-NEXT: v_writelane_b32 v19, s6, m0
3460; GFX802-GISEL-NEXT: v_writelane_b32 v20, s7, m0
3461; GFX802-GISEL-NEXT: v_writelane_b32 v21, s8, m0
3462; GFX802-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[18:21]
3463; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v6
3464; GFX802-GISEL-NEXT: v_add_u32_e32 v18, vcc, 32, v0
3465; GFX802-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v1, vcc
3466; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0
3467; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3468; GFX802-GISEL-NEXT: v_readfirstlane_b32 s6, v8
3469; GFX802-GISEL-NEXT: v_readfirstlane_b32 s7, v9
3470; GFX802-GISEL-NEXT: v_readfirstlane_b32 s8, v10
3471; GFX802-GISEL-NEXT: flat_load_dwordx4 v[6:9], v[18:19]
3472; GFX802-GISEL-NEXT: flat_load_dwordx4 v[10:13], v[0:1]
3473; GFX802-GISEL-NEXT: s_waitcnt vmcnt(3)
3474; GFX802-GISEL-NEXT: v_writelane_b32 v2, s4, m0
3475; GFX802-GISEL-NEXT: v_writelane_b32 v3, s5, m0
3476; GFX802-GISEL-NEXT: v_writelane_b32 v4, s6, m0
3477; GFX802-GISEL-NEXT: v_writelane_b32 v5, s7, m0
3478; GFX802-GISEL-NEXT: s_waitcnt vmcnt(1)
3479; GFX802-GISEL-NEXT: v_writelane_b32 v6, s8, m0
3480; GFX802-GISEL-NEXT: v_writelane_b32 v7, s9, m0
3481; GFX802-GISEL-NEXT: v_writelane_b32 v8, s10, m0
3482; GFX802-GISEL-NEXT: v_writelane_b32 v9, s11, m0
3483; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
3484; GFX802-GISEL-NEXT: v_writelane_b32 v10, s12, m0
3485; GFX802-GISEL-NEXT: v_writelane_b32 v11, s13, m0
3486; GFX802-GISEL-NEXT: v_writelane_b32 v12, s14, m0
3487; GFX802-GISEL-NEXT: v_writelane_b32 v13, s15, m0
3488; GFX802-GISEL-NEXT: flat_store_dwordx4 v[22:23], v[2:5]
3489; GFX802-GISEL-NEXT: flat_store_dwordx4 v[18:19], v[6:9]
3490; GFX802-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[10:13]
3491; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
3492; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
3493;
3494; GFX1010-GISEL-LABEL: test_writelane_v8f64:
3495; GFX1010-GISEL: ; %bb.0:
3496; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3497; GFX1010-GISEL-NEXT: s_clause 0x3
3498; GFX1010-GISEL-NEXT: global_load_dwordx4 v[19:22], v[0:1], off
3499; GFX1010-GISEL-NEXT: global_load_dwordx4 v[23:26], v[0:1], off offset:16
3500; GFX1010-GISEL-NEXT: global_load_dwordx4 v[27:30], v[0:1], off offset:32
3501; GFX1010-GISEL-NEXT: global_load_dwordx4 v[31:34], v[0:1], off offset:48
3502; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
3503; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v18
3504; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s6, v3
3505; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s7, v4
3506; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s8, v5
3507; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s9, v6
3508; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s10, v7
3509; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s11, v8
3510; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s12, v9
3511; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s13, v10
3512; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s14, v11
3513; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s15, v12
3514; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s16, v13
3515; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s17, v14
3516; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s18, v15
3517; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s19, v16
3518; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s20, v17
3519; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(3)
3520; GFX1010-GISEL-NEXT: v_writelane_b32 v19, s4, s5
3521; GFX1010-GISEL-NEXT: v_writelane_b32 v20, s6, s5
3522; GFX1010-GISEL-NEXT: v_writelane_b32 v21, s7, s5
3523; GFX1010-GISEL-NEXT: v_writelane_b32 v22, s8, s5
3524; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(2)
3525; GFX1010-GISEL-NEXT: v_writelane_b32 v23, s9, s5
3526; GFX1010-GISEL-NEXT: v_writelane_b32 v24, s10, s5
3527; GFX1010-GISEL-NEXT: v_writelane_b32 v25, s11, s5
3528; GFX1010-GISEL-NEXT: v_writelane_b32 v26, s12, s5
3529; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(1)
3530; GFX1010-GISEL-NEXT: v_writelane_b32 v27, s13, s5
3531; GFX1010-GISEL-NEXT: v_writelane_b32 v28, s14, s5
3532; GFX1010-GISEL-NEXT: v_writelane_b32 v29, s15, s5
3533; GFX1010-GISEL-NEXT: v_writelane_b32 v30, s16, s5
3534; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
3535; GFX1010-GISEL-NEXT: v_writelane_b32 v31, s17, s5
3536; GFX1010-GISEL-NEXT: v_writelane_b32 v32, s18, s5
3537; GFX1010-GISEL-NEXT: v_writelane_b32 v33, s19, s5
3538; GFX1010-GISEL-NEXT: v_writelane_b32 v34, s20, s5
3539; GFX1010-GISEL-NEXT: global_store_dwordx4 v[0:1], v[19:22], off
3540; GFX1010-GISEL-NEXT: global_store_dwordx4 v[0:1], v[23:26], off offset:16
3541; GFX1010-GISEL-NEXT: global_store_dwordx4 v[0:1], v[27:30], off offset:32
3542; GFX1010-GISEL-NEXT: global_store_dwordx4 v[0:1], v[31:34], off offset:48
3543; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
3544;
3545; GFX1100-GISEL-LABEL: test_writelane_v8f64:
3546; GFX1100-GISEL: ; %bb.0:
3547; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3548; GFX1100-GISEL-NEXT: s_clause 0x3
3549; GFX1100-GISEL-NEXT: global_load_b128 v[19:22], v[0:1], off
3550; GFX1100-GISEL-NEXT: global_load_b128 v[23:26], v[0:1], off offset:16
3551; GFX1100-GISEL-NEXT: global_load_b128 v[27:30], v[0:1], off offset:32
3552; GFX1100-GISEL-NEXT: global_load_b128 v[31:34], v[0:1], off offset:48
3553; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
3554; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v18
3555; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v3
3556; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s3, v4
3557; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s4, v5
3558; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s5, v6
3559; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s6, v7
3560; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s7, v8
3561; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s8, v9
3562; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s9, v10
3563; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s10, v11
3564; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s11, v12
3565; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s12, v13
3566; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s13, v14
3567; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s14, v15
3568; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s15, v16
3569; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s16, v17
3570; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(3)
3571; GFX1100-GISEL-NEXT: v_writelane_b32 v19, s0, s1
3572; GFX1100-GISEL-NEXT: v_writelane_b32 v20, s2, s1
3573; GFX1100-GISEL-NEXT: v_writelane_b32 v21, s3, s1
3574; GFX1100-GISEL-NEXT: v_writelane_b32 v22, s4, s1
3575; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(2)
3576; GFX1100-GISEL-NEXT: v_writelane_b32 v23, s5, s1
3577; GFX1100-GISEL-NEXT: v_writelane_b32 v24, s6, s1
3578; GFX1100-GISEL-NEXT: v_writelane_b32 v25, s7, s1
3579; GFX1100-GISEL-NEXT: v_writelane_b32 v26, s8, s1
3580; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(1)
3581; GFX1100-GISEL-NEXT: v_writelane_b32 v27, s9, s1
3582; GFX1100-GISEL-NEXT: v_writelane_b32 v28, s10, s1
3583; GFX1100-GISEL-NEXT: v_writelane_b32 v29, s11, s1
3584; GFX1100-GISEL-NEXT: v_writelane_b32 v30, s12, s1
3585; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
3586; GFX1100-GISEL-NEXT: v_writelane_b32 v31, s13, s1
3587; GFX1100-GISEL-NEXT: v_writelane_b32 v32, s14, s1
3588; GFX1100-GISEL-NEXT: v_writelane_b32 v33, s15, s1
3589; GFX1100-GISEL-NEXT: v_writelane_b32 v34, s16, s1
3590; GFX1100-GISEL-NEXT: s_clause 0x3
3591; GFX1100-GISEL-NEXT: global_store_b128 v[0:1], v[19:22], off
3592; GFX1100-GISEL-NEXT: global_store_b128 v[0:1], v[23:26], off offset:16
3593; GFX1100-GISEL-NEXT: global_store_b128 v[0:1], v[27:30], off offset:32
3594; GFX1100-GISEL-NEXT: global_store_b128 v[0:1], v[31:34], off offset:48
3595; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
3596 %oldval = load <8 x double>, ptr addrspace(1) %out
3597 %writelane = call <8 x double> @llvm.amdgcn.writelane.v8f64(<8 x double> %src, i32 %src1, <8 x double> %oldval)
3598 store <8 x double> %writelane, ptr addrspace(1) %out, align 4
3599 ret void
3600}
3601
Tim Renouf2a99fa22018-02-28 19:10:32 +00003602declare i32 @llvm.amdgcn.workitem.id.x() #2
3603
3604attributes #0 = { nounwind readnone convergent }
3605attributes #1 = { nounwind }
3606attributes #2 = { nounwind readnone }