blob: 96b11071c6be534096201d7d7ac1b8a2319f80e8 [file] [log] [blame]
Fangrui Song806761a2023-09-11 14:42:37 -07001; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
Mircea Trofinb4706302021-01-05 20:23:39 -08002; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
Fangrui Song806761a2023-09-11 14:42:37 -07003; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
Fangrui Song9e9907f2024-01-16 21:54:58 -08005; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00006
Farhana Aleena7cb3112018-03-09 17:41:39 +00007; Testing for ds_read_b128
Fangrui Song806761a2023-09-11 14:42:37 -07008; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
9; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
Farhana Aleena7cb3112018-03-09 17:41:39 +000010
Matt Arsenaultd1097a32016-06-02 19:54:26 +000011; FUNC-LABEL: {{^}}local_load_f64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000012; SICIV: s_mov_b32 m0
13; GFX9-NOT: m0
14
Matt Arsenaultd1097a32016-06-02 19:54:26 +000015; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}}
16; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]]
17
18; EG: LDS_READ_RET
19; EG: LDS_READ_RET
Nikita Popovbdf2fbb2022-12-19 12:39:01 +010020define amdgpu_kernel void @local_load_f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
21 %ld = load double, ptr addrspace(3) %in
22 store double %ld, ptr addrspace(3) %out
Matt Arsenaultd1097a32016-06-02 19:54:26 +000023 ret void
24}
25
26; FUNC-LABEL: {{^}}local_load_v2f64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000027; SICIV: s_mov_b32 m0
28; GFX9-NOT: m0
29
Matt Arsenaultd1097a32016-06-02 19:54:26 +000030; GCN: ds_read2_b64
31
32; EG: LDS_READ_RET
33; EG: LDS_READ_RET
34; EG: LDS_READ_RET
35; EG: LDS_READ_RET
Nikita Popovbdf2fbb2022-12-19 12:39:01 +010036define amdgpu_kernel void @local_load_v2f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000037entry:
Nikita Popovbdf2fbb2022-12-19 12:39:01 +010038 %ld = load <2 x double>, ptr addrspace(3) %in
39 store <2 x double> %ld, ptr addrspace(3) %out
Matt Arsenaultd1097a32016-06-02 19:54:26 +000040 ret void
41}
42
43; FUNC-LABEL: {{^}}local_load_v3f64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000044; SICIV: s_mov_b32 m0
45; GFX9-NOT: m0
46
Matt Arsenaultd1097a32016-06-02 19:54:26 +000047; GCN-DAG: ds_read2_b64
48; GCN-DAG: ds_read_b64
49
50; EG: LDS_READ_RET
51; EG: LDS_READ_RET
52; EG: LDS_READ_RET
53; EG: LDS_READ_RET
54; EG: LDS_READ_RET
55; EG: LDS_READ_RET
Nikita Popovbdf2fbb2022-12-19 12:39:01 +010056define amdgpu_kernel void @local_load_v3f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000057entry:
Nikita Popovbdf2fbb2022-12-19 12:39:01 +010058 %ld = load <3 x double>, ptr addrspace(3) %in
59 store <3 x double> %ld, ptr addrspace(3) %out
Matt Arsenaultd1097a32016-06-02 19:54:26 +000060 ret void
61}
62
63; FUNC-LABEL: {{^}}local_load_v4f64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000064; SICIV: s_mov_b32 m0
65; GFX9-NOT: m0
66
Matt Arsenaultd1097a32016-06-02 19:54:26 +000067; GCN: ds_read2_b64
68; GCN: ds_read2_b64
69
70; EG: LDS_READ_RET
71; EG: LDS_READ_RET
72; EG: LDS_READ_RET
73; EG: LDS_READ_RET
74
75; EG: LDS_READ_RET
76; EG: LDS_READ_RET
77; EG: LDS_READ_RET
78; EG: LDS_READ_RET
Nikita Popovbdf2fbb2022-12-19 12:39:01 +010079define amdgpu_kernel void @local_load_v4f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000080entry:
Nikita Popovbdf2fbb2022-12-19 12:39:01 +010081 %ld = load <4 x double>, ptr addrspace(3) %in
82 store <4 x double> %ld, ptr addrspace(3) %out
Matt Arsenaultd1097a32016-06-02 19:54:26 +000083 ret void
84}
85
86; FUNC-LABEL: {{^}}local_load_v8f64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000087; SICIV: s_mov_b32 m0
88; GFX9-NOT: m0
89
Matt Arsenaultd1097a32016-06-02 19:54:26 +000090; GCN: ds_read2_b64
91; GCN: ds_read2_b64
92; GCN: ds_read2_b64
93; GCN: ds_read2_b64
94
95; EG: LDS_READ_RET
96; EG: LDS_READ_RET
97; EG: LDS_READ_RET
98; EG: LDS_READ_RET
99; EG: LDS_READ_RET
100; EG: LDS_READ_RET
101; EG: LDS_READ_RET
102; EG: LDS_READ_RET
103; EG: LDS_READ_RET
104; EG: LDS_READ_RET
105; EG: LDS_READ_RET
106; EG: LDS_READ_RET
107; EG: LDS_READ_RET
108; EG: LDS_READ_RET
109; EG: LDS_READ_RET
110; EG: LDS_READ_RET
Nikita Popovbdf2fbb2022-12-19 12:39:01 +0100111define amdgpu_kernel void @local_load_v8f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000112entry:
Nikita Popovbdf2fbb2022-12-19 12:39:01 +0100113 %ld = load <8 x double>, ptr addrspace(3) %in
114 store <8 x double> %ld, ptr addrspace(3) %out
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000115 ret void
116}
117
118; FUNC-LABEL: {{^}}local_load_v16f64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000119; SICIV: s_mov_b32 m0
120; GFX9-NOT: m0
121
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000122; GCN: ds_read2_b64
123; GCN: ds_read2_b64
124; GCN: ds_read2_b64
125; GCN: ds_read2_b64
126; GCN: ds_read2_b64
127; GCN: ds_read2_b64
128; GCN: ds_read2_b64
129; GCN: ds_read2_b64
130
131; EG: LDS_READ_RET
132; EG: LDS_READ_RET
133; EG: LDS_READ_RET
134; EG: LDS_READ_RET
135
136; EG: LDS_READ_RET
137; EG: LDS_READ_RET
138; EG: LDS_READ_RET
139; EG: LDS_READ_RET
140
141; EG: LDS_READ_RET
142; EG: LDS_READ_RET
143; EG: LDS_READ_RET
144; EG: LDS_READ_RET
145
146; EG: LDS_READ_RET
147; EG: LDS_READ_RET
148; EG: LDS_READ_RET
149; EG: LDS_READ_RET
150
151; EG: LDS_READ_RET
152; EG: LDS_READ_RET
153; EG: LDS_READ_RET
154; EG: LDS_READ_RET
155
156; EG: LDS_READ_RET
157; EG: LDS_READ_RET
158; EG: LDS_READ_RET
159; EG: LDS_READ_RET
160
161; EG: LDS_READ_RET
162; EG: LDS_READ_RET
163; EG: LDS_READ_RET
164; EG: LDS_READ_RET
165
166; EG: LDS_READ_RET
167; EG: LDS_READ_RET
168; EG: LDS_READ_RET
169; EG: LDS_READ_RET
Nikita Popovbdf2fbb2022-12-19 12:39:01 +0100170define amdgpu_kernel void @local_load_v16f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000171entry:
Nikita Popovbdf2fbb2022-12-19 12:39:01 +0100172 %ld = load <16 x double>, ptr addrspace(3) %in
173 store <16 x double> %ld, ptr addrspace(3) %out
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000174 ret void
175}
176
Farhana Aleena7cb3112018-03-09 17:41:39 +0000177; Tests if ds_read_b128 gets generated for the 16 byte aligned load.
178; FUNC-LABEL: {{^}}local_load_v2f64_to_128:
Farhana Aleenc6c9dc82018-03-16 18:12:00 +0000179
Farhana Aleena7cb3112018-03-09 17:41:39 +0000180; CIVI: ds_read_b128
Farhana Aleenc6c9dc82018-03-16 18:12:00 +0000181; CIVI: ds_write_b128
182
Farhana Aleena7cb3112018-03-09 17:41:39 +0000183; EG: LDS_READ_RET
184; EG: LDS_READ_RET
185; EG: LDS_READ_RET
186; EG: LDS_READ_RET
Nikita Popovbdf2fbb2022-12-19 12:39:01 +0100187define amdgpu_kernel void @local_load_v2f64_to_128(ptr addrspace(3) %out, ptr addrspace(3) %in) {
Farhana Aleena7cb3112018-03-09 17:41:39 +0000188entry:
Nikita Popovbdf2fbb2022-12-19 12:39:01 +0100189 %ld = load <2 x double>, ptr addrspace(3) %in, align 16
190 store <2 x double> %ld, ptr addrspace(3) %out, align 16
Farhana Aleena7cb3112018-03-09 17:41:39 +0000191 ret void
192}
193
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000194attributes #0 = { nounwind }