blob: 72c6010a5a80bcad70d4aafe6ca7e43dd38067e3 [file] [log] [blame]
Piotr Sobczak170c0da2025-03-07 08:56:44 +01001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=SDAG %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GISEL %s
4
5; Check that in strict OOB mode for buffers (relaxed-buffer-oob-mode attribute not set) the underaligned loads and stores get split.
6; FIXME: The loads/stores do not get split (extend amdgpu-lower-buffer-fat-pointers?).
7
8define amdgpu_ps void @split_underaligned_load(ptr addrspace(7) inreg %p, ptr addrspace(7) inreg %p2) #0 {
9; CHECK-LABEL: split_underaligned_load:
10; CHECK: ; %bb.0: ; %entry
11; CHECK-NEXT: v_mov_b32_e32 v0, s4
12; CHECK-NEXT: v_mov_b32_e32 v2, s9
13; CHECK-NEXT: s_mov_b32 s15, s8
14; CHECK-NEXT: s_mov_b32 s14, s7
15; CHECK-NEXT: s_mov_b32 s13, s6
16; CHECK-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen
17; CHECK-NEXT: s_mov_b32 s12, s5
18; CHECK-NEXT: s_waitcnt vmcnt(0)
19; CHECK-NEXT: buffer_store_b64 v[0:1], v2, s[12:15], 0 offen
20; CHECK-NEXT: s_endpgm
21; SDAG-LABEL: split_underaligned_load:
22; SDAG: ; %bb.0: ; %entry
23; SDAG-NEXT: v_mov_b32_e32 v0, s4
24; SDAG-NEXT: v_mov_b32_e32 v2, s9
25; SDAG-NEXT: s_mov_b32 s15, s8
26; SDAG-NEXT: s_mov_b32 s14, s7
27; SDAG-NEXT: s_mov_b32 s13, s6
28; SDAG-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen
29; SDAG-NEXT: s_mov_b32 s12, s5
30; SDAG-NEXT: s_waitcnt vmcnt(0)
31; SDAG-NEXT: buffer_store_b64 v[0:1], v2, s[12:15], 0 offen
32; SDAG-NEXT: s_endpgm
33;
34; GISEL-LABEL: split_underaligned_load:
35; GISEL: ; %bb.0: ; %entry
36; GISEL-NEXT: v_mov_b32_e32 v0, s4
37; GISEL-NEXT: v_mov_b32_e32 v2, s9
38; GISEL-NEXT: s_mov_b32 s12, s5
39; GISEL-NEXT: s_mov_b32 s13, s6
40; GISEL-NEXT: s_mov_b32 s14, s7
41; GISEL-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen
42; GISEL-NEXT: s_mov_b32 s15, s8
43; GISEL-NEXT: s_waitcnt vmcnt(0)
44; GISEL-NEXT: buffer_store_b64 v[0:1], v2, s[12:15], 0 offen
45; GISEL-NEXT: s_endpgm
46entry:
47 %gep = getelementptr i8, ptr addrspace(7) %p, i32 0
48 %ld = load i64, ptr addrspace(7) %gep, align 4
49
50 %gep2 = getelementptr i8, ptr addrspace(7) %p2, i32 0
51 store i64 %ld, ptr addrspace(7) %gep2, align 4
52 ret void
53}
54
55; Check that in strict OOB mode for buffers (relaxed-buffer-oob-mode attribute not set) the naturally aligned loads and stores do not get split.
56
57define amdgpu_ps void @do_not_split_aligned_load(ptr addrspace(7) inreg %p, ptr addrspace(7) inreg %p2) #0 {
58; CHECK-LABEL: do_not_split_aligned_load:
59; CHECK: ; %bb.0: ; %entry
60; CHECK-NEXT: v_mov_b32_e32 v0, s4
61; CHECK-NEXT: v_mov_b32_e32 v2, s9
62; CHECK-NEXT: s_mov_b32 s15, s8
63; CHECK-NEXT: s_mov_b32 s14, s7
64; CHECK-NEXT: s_mov_b32 s13, s6
65; CHECK-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen
66; CHECK-NEXT: s_mov_b32 s12, s5
67; CHECK-NEXT: s_waitcnt vmcnt(0)
68; CHECK-NEXT: buffer_store_b64 v[0:1], v2, s[12:15], 0 offen
69; CHECK-NEXT: s_endpgm
70; SDAG-LABEL: do_not_split_aligned_load:
71; SDAG: ; %bb.0: ; %entry
72; SDAG-NEXT: v_mov_b32_e32 v0, s4
73; SDAG-NEXT: v_mov_b32_e32 v2, s9
74; SDAG-NEXT: s_mov_b32 s15, s8
75; SDAG-NEXT: s_mov_b32 s14, s7
76; SDAG-NEXT: s_mov_b32 s13, s6
77; SDAG-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen
78; SDAG-NEXT: s_mov_b32 s12, s5
79; SDAG-NEXT: s_waitcnt vmcnt(0)
80; SDAG-NEXT: buffer_store_b64 v[0:1], v2, s[12:15], 0 offen
81; SDAG-NEXT: s_endpgm
82;
83; GISEL-LABEL: do_not_split_aligned_load:
84; GISEL: ; %bb.0: ; %entry
85; GISEL-NEXT: v_mov_b32_e32 v0, s4
86; GISEL-NEXT: v_mov_b32_e32 v2, s9
87; GISEL-NEXT: s_mov_b32 s12, s5
88; GISEL-NEXT: s_mov_b32 s13, s6
89; GISEL-NEXT: s_mov_b32 s14, s7
90; GISEL-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen
91; GISEL-NEXT: s_mov_b32 s15, s8
92; GISEL-NEXT: s_waitcnt vmcnt(0)
93; GISEL-NEXT: buffer_store_b64 v[0:1], v2, s[12:15], 0 offen
94; GISEL-NEXT: s_endpgm
95entry:
96 %gep = getelementptr i8, ptr addrspace(7) %p, i32 0
97 %ld = load i64, ptr addrspace(7) %gep, align 8
98
99 %gep2 = getelementptr i8, ptr addrspace(7) %p2, i32 0
100 store i64 %ld, ptr addrspace(7) %gep2, align 8
101 ret void
102}