blob: 838df340b402ecd47f0429704c0070a6f9837939 [file] [log] [blame]
Manos Anagnostakis008f26b2023-09-14 17:58:39 +03001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; RUN: llc < %s -O2 -mtriple=aarch64 -mcpu=ampere1 | FileCheck %s --check-prefixes=CHECK
3; RUN: llc < %s -O2 -mtriple=aarch64 -mcpu=ampere1a | FileCheck %s --check-prefixes=CHECK
4; RUN: llc < %s -O2 -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-DEFAULT
5; RUN: llc < %s -O2 -mtriple=aarch64 -mcpu=ampere1 -mattr=+disable-ldp | FileCheck %s --check-prefixes=CHECK-DISABLE-LDP
6; RUN: llc < %s -O2 -mtriple=aarch64 -mcpu=ampere1 -mattr=+disable-stp | FileCheck %s --check-prefixes=CHECK-DISABLE-STP
7; RUN: llc < %s -O2 -mtriple=aarch64 -mcpu=ampere1a -mattr=+disable-ldp | FileCheck %s --check-prefixes=CHECK-DISABLE-LDP
8; RUN: llc < %s -O2 -mtriple=aarch64 -mcpu=ampere1a -mattr=+disable-stp | FileCheck %s --check-prefixes=CHECK-DISABLE-STP
9
10define i32 @ldp_aligned_int32_t(ptr %0) #0 {
11; CHECK-LABEL: ldp_aligned_int32_t:
12; CHECK: // %bb.0:
13; CHECK-NEXT: and x8, x0, #0xffffffffffffffc0
14; CHECK-NEXT: ldp w9, w8, [x8]
15; CHECK-NEXT: add w0, w8, w9
16; CHECK-NEXT: ret
17;
18; CHECK-DEFAULT-LABEL: ldp_aligned_int32_t:
19; CHECK-DEFAULT: // %bb.0:
20; CHECK-DEFAULT-NEXT: and x8, x0, #0xffffffffffffffc0
21; CHECK-DEFAULT-NEXT: ldp w9, w8, [x8]
22; CHECK-DEFAULT-NEXT: add w0, w8, w9
23; CHECK-DEFAULT-NEXT: ret
24;
25; CHECK-DISABLE-LDP-LABEL: ldp_aligned_int32_t:
26; CHECK-DISABLE-LDP: // %bb.0:
27; CHECK-DISABLE-LDP-NEXT: and x8, x0, #0xffffffffffffffc0
28; CHECK-DISABLE-LDP-NEXT: ldr w9, [x8]
29; CHECK-DISABLE-LDP-NEXT: ldr w8, [x8, #4]
30; CHECK-DISABLE-LDP-NEXT: add w0, w8, w9
31; CHECK-DISABLE-LDP-NEXT: ret
32 %2 = ptrtoint ptr %0 to i64
33 %3 = and i64 %2, -64
34 %4 = inttoptr i64 %3 to ptr
35 %5 = load i32, ptr %4, align 64
36 %6 = getelementptr inbounds i32, ptr %4, i64 1
37 %7 = load i32, ptr %6, align 4
38 %8 = add nsw i32 %7, %5
39 ret i32 %8
40}
41
42define i64 @ldp_aligned_int64_t(ptr %0) #0 {
43; CHECK-LABEL: ldp_aligned_int64_t:
44; CHECK: // %bb.0:
45; CHECK-NEXT: and x8, x0, #0xffffffffffffff80
46; CHECK-NEXT: ldp x9, x8, [x8]
47; CHECK-NEXT: add x0, x8, x9
48; CHECK-NEXT: ret
49;
50; CHECK-DEFAULT-LABEL: ldp_aligned_int64_t:
51; CHECK-DEFAULT: // %bb.0:
52; CHECK-DEFAULT-NEXT: and x8, x0, #0xffffffffffffff80
53; CHECK-DEFAULT-NEXT: ldp x9, x8, [x8]
54; CHECK-DEFAULT-NEXT: add x0, x8, x9
55; CHECK-DEFAULT-NEXT: ret
56;
57; CHECK-DISABLE-LDP-LABEL: ldp_aligned_int64_t:
58; CHECK-DISABLE-LDP: // %bb.0:
59; CHECK-DISABLE-LDP-NEXT: and x8, x0, #0xffffffffffffff80
60; CHECK-DISABLE-LDP-NEXT: ldr x9, [x8]
61; CHECK-DISABLE-LDP-NEXT: ldr x8, [x8, #8]
62; CHECK-DISABLE-LDP-NEXT: add x0, x8, x9
63; CHECK-DISABLE-LDP-NEXT: ret
64 %2 = ptrtoint ptr %0 to i64
65 %3 = and i64 %2, -128
66 %4 = inttoptr i64 %3 to ptr
67 %5 = load i64, ptr %4, align 128
68 %6 = getelementptr inbounds i64, ptr %4, i64 1
69 %7 = load i64, ptr %6, align 8
70 %8 = add nsw i64 %7, %5
71 ret i64 %8
72}
73
74define <4 x i32> @ldp_aligned_v4si(ptr %0) #0 {
75; CHECK-LABEL: ldp_aligned_v4si:
76; CHECK: // %bb.0:
77; CHECK-NEXT: and x8, x0, #0xffffffffffffff00
78; CHECK-NEXT: ldp q0, q1, [x8]
79; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
80; CHECK-NEXT: ret
81;
82; CHECK-DEFAULT-LABEL: ldp_aligned_v4si:
83; CHECK-DEFAULT: // %bb.0:
84; CHECK-DEFAULT-NEXT: and x8, x0, #0xffffffffffffff00
85; CHECK-DEFAULT-NEXT: ldp q0, q1, [x8]
86; CHECK-DEFAULT-NEXT: add v0.4s, v1.4s, v0.4s
87; CHECK-DEFAULT-NEXT: ret
88;
89; CHECK-DISABLE-LDP-LABEL: ldp_aligned_v4si:
90; CHECK-DISABLE-LDP: // %bb.0:
91; CHECK-DISABLE-LDP-NEXT: and x8, x0, #0xffffffffffffff00
92; CHECK-DISABLE-LDP-NEXT: ldr q0, [x8]
93; CHECK-DISABLE-LDP-NEXT: ldr q1, [x8, #16]
94; CHECK-DISABLE-LDP-NEXT: add v0.4s, v1.4s, v0.4s
95; CHECK-DISABLE-LDP-NEXT: ret
96 %2 = ptrtoint ptr %0 to i64
97 %3 = and i64 %2, -256
98 %4 = inttoptr i64 %3 to ptr
99 %5 = load <4 x i32>, ptr %4, align 256
100 %6 = getelementptr inbounds <4 x i32>, ptr %4, i64 1
101 %7 = load <4 x i32>, ptr %6, align 16
102 %8 = add <4 x i32> %7, %5
103 ret <4 x i32> %8
104}
105
106define i32 @ldp_unaligned_int32_t(ptr %0) #0 {
107; CHECK-LABEL: ldp_unaligned_int32_t:
108; CHECK: // %bb.0:
109; CHECK-NEXT: and x8, x0, #0xffffffffffffffc0
110; CHECK-NEXT: ldr w9, [x8, #4]
111; CHECK-NEXT: ldr w8, [x8, #8]
112; CHECK-NEXT: add w0, w8, w9
113; CHECK-NEXT: ret
114;
115; CHECK-DEFAULT-LABEL: ldp_unaligned_int32_t:
116; CHECK-DEFAULT: // %bb.0:
117; CHECK-DEFAULT-NEXT: and x8, x0, #0xffffffffffffffc0
118; CHECK-DEFAULT-NEXT: ldp w9, w8, [x8, #4]
119; CHECK-DEFAULT-NEXT: add w0, w8, w9
120; CHECK-DEFAULT-NEXT: ret
121;
122; CHECK-DISABLE-LDP-LABEL: ldp_unaligned_int32_t:
123; CHECK-DISABLE-LDP: // %bb.0:
124; CHECK-DISABLE-LDP-NEXT: and x8, x0, #0xffffffffffffffc0
125; CHECK-DISABLE-LDP-NEXT: ldr w9, [x8, #4]
126; CHECK-DISABLE-LDP-NEXT: ldr w8, [x8, #8]
127; CHECK-DISABLE-LDP-NEXT: add w0, w8, w9
128; CHECK-DISABLE-LDP-NEXT: ret
129 %2 = ptrtoint ptr %0 to i64
130 %3 = and i64 %2, -64
131 %4 = inttoptr i64 %3 to ptr
132 %5 = getelementptr inbounds i32, ptr %4, i64 1
133 %6 = load i32, ptr %5, align 4
134 %7 = getelementptr inbounds i32, ptr %4, i64 2
135 %8 = load i32, ptr %7, align 8
136 %9 = add nsw i32 %8, %6
137 ret i32 %9
138}
139
140define i64 @ldp_unaligned_int64_t(ptr %0) #0 {
141; CHECK-LABEL: ldp_unaligned_int64_t:
142; CHECK: // %bb.0:
143; CHECK-NEXT: and x8, x0, #0xffffffffffffff80
144; CHECK-NEXT: ldr x9, [x8, #8]
145; CHECK-NEXT: ldr x8, [x8, #16]
146; CHECK-NEXT: add x0, x8, x9
147; CHECK-NEXT: ret
148;
149; CHECK-DEFAULT-LABEL: ldp_unaligned_int64_t:
150; CHECK-DEFAULT: // %bb.0:
151; CHECK-DEFAULT-NEXT: and x8, x0, #0xffffffffffffff80
152; CHECK-DEFAULT-NEXT: ldp x9, x8, [x8, #8]
153; CHECK-DEFAULT-NEXT: add x0, x8, x9
154; CHECK-DEFAULT-NEXT: ret
155;
156; CHECK-DISABLE-LDP-LABEL: ldp_unaligned_int64_t:
157; CHECK-DISABLE-LDP: // %bb.0:
158; CHECK-DISABLE-LDP-NEXT: and x8, x0, #0xffffffffffffff80
159; CHECK-DISABLE-LDP-NEXT: ldr x9, [x8, #8]
160; CHECK-DISABLE-LDP-NEXT: ldr x8, [x8, #16]
161; CHECK-DISABLE-LDP-NEXT: add x0, x8, x9
162; CHECK-DISABLE-LDP-NEXT: ret
163 %2 = ptrtoint ptr %0 to i64
164 %3 = and i64 %2, -128
165 %4 = inttoptr i64 %3 to ptr
166 %5 = getelementptr inbounds i64, ptr %4, i64 1
167 %6 = load i64, ptr %5, align 8
168 %7 = getelementptr inbounds i64, ptr %4, i64 2
169 %8 = load i64, ptr %7, align 16
170 %9 = add nsw i64 %8, %6
171 ret i64 %9
172}
173
174define <4 x i32> @ldp_unaligned_v4si(ptr %0) #0 {
175; CHECK-LABEL: ldp_unaligned_v4si:
176; CHECK: // %bb.0:
177; CHECK-NEXT: and x8, x0, #0xffffffffffffff00
178; CHECK-NEXT: ldr q0, [x8, #16]
179; CHECK-NEXT: ldr q1, [x8, #32]
180; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
181; CHECK-NEXT: ret
182;
183; CHECK-DEFAULT-LABEL: ldp_unaligned_v4si:
184; CHECK-DEFAULT: // %bb.0:
185; CHECK-DEFAULT-NEXT: and x8, x0, #0xffffffffffffff00
186; CHECK-DEFAULT-NEXT: ldp q0, q1, [x8, #16]
187; CHECK-DEFAULT-NEXT: add v0.4s, v1.4s, v0.4s
188; CHECK-DEFAULT-NEXT: ret
189;
190; CHECK-DISABLE-LDP-LABEL: ldp_unaligned_v4si:
191; CHECK-DISABLE-LDP: // %bb.0:
192; CHECK-DISABLE-LDP-NEXT: and x8, x0, #0xffffffffffffff00
193; CHECK-DISABLE-LDP-NEXT: ldr q0, [x8, #16]
194; CHECK-DISABLE-LDP-NEXT: ldr q1, [x8, #32]
195; CHECK-DISABLE-LDP-NEXT: add v0.4s, v1.4s, v0.4s
196; CHECK-DISABLE-LDP-NEXT: ret
197 %2 = ptrtoint ptr %0 to i64
198 %3 = and i64 %2, -256
199 %4 = inttoptr i64 %3 to ptr
200 %5 = getelementptr inbounds <4 x i32>, ptr %4, i64 1
201 %6 = load <4 x i32>, ptr %5, align 16
202 %7 = getelementptr inbounds <4 x i32>, ptr %4, i64 2
203 %8 = load <4 x i32>, ptr %7, align 32
204 %9 = add <4 x i32> %8, %6
205 ret <4 x i32> %9
206}
207
208define ptr @stp_aligned_int32_t(ptr %0, i32 %1) #0 {
209; CHECK-LABEL: stp_aligned_int32_t:
210; CHECK: // %bb.0:
211; CHECK-NEXT: and x0, x0, #0xffffffffffffffc0
212; CHECK-NEXT: stp w1, w1, [x0]
213; CHECK-NEXT: ret
214;
215; CHECK-DEFAULT-LABEL: stp_aligned_int32_t:
216; CHECK-DEFAULT: // %bb.0:
217; CHECK-DEFAULT-NEXT: and x0, x0, #0xffffffffffffffc0
218; CHECK-DEFAULT-NEXT: stp w1, w1, [x0]
219; CHECK-DEFAULT-NEXT: ret
220;
221; CHECK-DISABLE-STP-LABEL: stp_aligned_int32_t:
222; CHECK-DISABLE-STP: // %bb.0:
223; CHECK-DISABLE-STP-NEXT: and x0, x0, #0xffffffffffffffc0
224; CHECK-DISABLE-STP-NEXT: str w1, [x0]
225; CHECK-DISABLE-STP-NEXT: str w1, [x0, #4]
226; CHECK-DISABLE-STP-NEXT: ret
227 %3 = ptrtoint ptr %0 to i64
228 %4 = and i64 %3, -64
229 %5 = inttoptr i64 %4 to ptr
230 store i32 %1, ptr %5, align 64
231 %6 = getelementptr inbounds i32, ptr %5, i64 1
232 store i32 %1, ptr %6, align 4
233 ret ptr %5
234}
235
236define dso_local ptr @stp_aligned_int64_t(ptr %0, i64 %1) #0 {
237; CHECK-LABEL: stp_aligned_int64_t:
238; CHECK: // %bb.0:
239; CHECK-NEXT: and x0, x0, #0xffffffffffffff80
240; CHECK-NEXT: stp x1, x1, [x0]
241; CHECK-NEXT: ret
242;
243; CHECK-DEFAULT-LABEL: stp_aligned_int64_t:
244; CHECK-DEFAULT: // %bb.0:
245; CHECK-DEFAULT-NEXT: and x0, x0, #0xffffffffffffff80
246; CHECK-DEFAULT-NEXT: stp x1, x1, [x0]
247; CHECK-DEFAULT-NEXT: ret
248;
249; CHECK-DISABLE-STP-LABEL: stp_aligned_int64_t:
250; CHECK-DISABLE-STP: // %bb.0:
251; CHECK-DISABLE-STP-NEXT: and x0, x0, #0xffffffffffffff80
252; CHECK-DISABLE-STP-NEXT: str x1, [x0]
253; CHECK-DISABLE-STP-NEXT: str x1, [x0, #8]
254; CHECK-DISABLE-STP-NEXT: ret
255 %3 = ptrtoint ptr %0 to i64
256 %4 = and i64 %3, -128
257 %5 = inttoptr i64 %4 to ptr
258 store i64 %1, ptr %5, align 128
259 %6 = getelementptr inbounds i64, ptr %5, i64 1
260 store i64 %1, ptr %6, align 8
261 ret ptr %5
262}
263
264define ptr @stp_aligned_v4si(ptr %0, <4 x i32> %1) #0 {
265; CHECK-LABEL: stp_aligned_v4si:
266; CHECK: // %bb.0:
267; CHECK-NEXT: and x0, x0, #0xffffffffffffff00
268; CHECK-NEXT: stp q0, q0, [x0]
269; CHECK-NEXT: ret
270;
271; CHECK-DEFAULT-LABEL: stp_aligned_v4si:
272; CHECK-DEFAULT: // %bb.0:
273; CHECK-DEFAULT-NEXT: and x0, x0, #0xffffffffffffff00
274; CHECK-DEFAULT-NEXT: stp q0, q0, [x0]
275; CHECK-DEFAULT-NEXT: ret
276;
277; CHECK-DISABLE-STP-LABEL: stp_aligned_v4si:
278; CHECK-DISABLE-STP: // %bb.0:
279; CHECK-DISABLE-STP-NEXT: and x0, x0, #0xffffffffffffff00
280; CHECK-DISABLE-STP-NEXT: str q0, [x0]
281; CHECK-DISABLE-STP-NEXT: str q0, [x0, #16]
282; CHECK-DISABLE-STP-NEXT: ret
283 %3 = ptrtoint ptr %0 to i64
284 %4 = and i64 %3, -256
285 %5 = inttoptr i64 %4 to ptr
286 store <4 x i32> %1, ptr %5, align 256
287 %6 = getelementptr inbounds <4 x i32>, ptr %5, i64 1
288 store <4 x i32> %1, ptr %6, align 16
289 ret ptr %5
290}
291
292define ptr @stp_unaligned_int32_t(ptr %0, i32 %1) #0 {
293; CHECK-LABEL: stp_unaligned_int32_t:
294; CHECK: // %bb.0:
295; CHECK-NEXT: and x8, x0, #0xffffffffffffffc0
296; CHECK-NEXT: orr x0, x8, #0x4
297; CHECK-NEXT: str w1, [x8, #4]
298; CHECK-NEXT: str w1, [x8, #8]
299; CHECK-NEXT: ret
300;
301; CHECK-DEFAULT-LABEL: stp_unaligned_int32_t:
302; CHECK-DEFAULT: // %bb.0:
303; CHECK-DEFAULT-NEXT: and x8, x0, #0xffffffffffffffc0
304; CHECK-DEFAULT-NEXT: orr x0, x8, #0x4
305; CHECK-DEFAULT-NEXT: stp w1, w1, [x8, #4]
306; CHECK-DEFAULT-NEXT: ret
307;
308; CHECK-DISABLE-STP-LABEL: stp_unaligned_int32_t:
309; CHECK-DISABLE-STP: // %bb.0:
310; CHECK-DISABLE-STP-NEXT: and x8, x0, #0xffffffffffffffc0
311; CHECK-DISABLE-STP-NEXT: orr x0, x8, #0x4
312; CHECK-DISABLE-STP-NEXT: str w1, [x8, #4]
313; CHECK-DISABLE-STP-NEXT: str w1, [x8, #8]
314; CHECK-DISABLE-STP-NEXT: ret
315 %3 = ptrtoint ptr %0 to i64
316 %4 = and i64 %3, -64
317 %5 = inttoptr i64 %4 to ptr
318 %6 = getelementptr inbounds i32, ptr %5, i64 1
319 store i32 %1, ptr %6, align 4
320 %7 = getelementptr inbounds i32, ptr %5, i64 2
321 store i32 %1, ptr %7, align 8
322 ret ptr %6
323}
324
325define ptr @stp_unaligned_int64_t(ptr %0, i64 %1) #0 {
326; CHECK-LABEL: stp_unaligned_int64_t:
327; CHECK: // %bb.0:
328; CHECK-NEXT: and x8, x0, #0xffffffffffffff80
329; CHECK-NEXT: orr x0, x8, #0x8
330; CHECK-NEXT: str x1, [x8, #8]
331; CHECK-NEXT: str x1, [x8, #16]
332; CHECK-NEXT: ret
333;
334; CHECK-DEFAULT-LABEL: stp_unaligned_int64_t:
335; CHECK-DEFAULT: // %bb.0:
336; CHECK-DEFAULT-NEXT: and x8, x0, #0xffffffffffffff80
337; CHECK-DEFAULT-NEXT: orr x0, x8, #0x8
338; CHECK-DEFAULT-NEXT: stp x1, x1, [x8, #8]
339; CHECK-DEFAULT-NEXT: ret
340;
341; CHECK-DISABLE-STP-LABEL: stp_unaligned_int64_t:
342; CHECK-DISABLE-STP: // %bb.0:
343; CHECK-DISABLE-STP-NEXT: and x8, x0, #0xffffffffffffff80
344; CHECK-DISABLE-STP-NEXT: orr x0, x8, #0x8
345; CHECK-DISABLE-STP-NEXT: str x1, [x8, #8]
346; CHECK-DISABLE-STP-NEXT: str x1, [x8, #16]
347; CHECK-DISABLE-STP-NEXT: ret
348 %3 = ptrtoint ptr %0 to i64
349 %4 = and i64 %3, -128
350 %5 = inttoptr i64 %4 to ptr
351 %6 = getelementptr inbounds i64, ptr %5, i64 1
352 store i64 %1, ptr %6, align 8
353 %7 = getelementptr inbounds i64, ptr %5, i64 2
354 store i64 %1, ptr %7, align 16
355 ret ptr %6
356}
357
358define ptr @stp_unaligned_v4si(ptr %0, <4 x i32> %1) #0 {
359; CHECK-LABEL: stp_unaligned_v4si:
360; CHECK: // %bb.0:
361; CHECK-NEXT: and x8, x0, #0xffffffffffffff00
362; CHECK-NEXT: orr x0, x8, #0x10
363; CHECK-NEXT: str q0, [x8, #16]
364; CHECK-NEXT: str q0, [x8, #32]
365; CHECK-NEXT: ret
366;
367; CHECK-DEFAULT-LABEL: stp_unaligned_v4si:
368; CHECK-DEFAULT: // %bb.0:
369; CHECK-DEFAULT-NEXT: and x8, x0, #0xffffffffffffff00
370; CHECK-DEFAULT-NEXT: orr x0, x8, #0x10
371; CHECK-DEFAULT-NEXT: stp q0, q0, [x8, #16]
372; CHECK-DEFAULT-NEXT: ret
373;
374; CHECK-DISABLE-STP-LABEL: stp_unaligned_v4si:
375; CHECK-DISABLE-STP: // %bb.0:
376; CHECK-DISABLE-STP-NEXT: and x8, x0, #0xffffffffffffff00
377; CHECK-DISABLE-STP-NEXT: orr x0, x8, #0x10
378; CHECK-DISABLE-STP-NEXT: str q0, [x8, #16]
379; CHECK-DISABLE-STP-NEXT: str q0, [x8, #32]
380; CHECK-DISABLE-STP-NEXT: ret
381 %3 = ptrtoint ptr %0 to i64
382 %4 = and i64 %3, -256
383 %5 = inttoptr i64 %4 to ptr
384 %6 = getelementptr inbounds <4 x i32>, ptr %5, i64 1
385 store <4 x i32> %1, ptr %6, align 16
386 %7 = getelementptr inbounds <4 x i32>, ptr %5, i64 2
387 store <4 x i32> %1, ptr %7, align 32
388 ret ptr %6
389}