blob: 1edb7dae6f5370fef6a564305b04667d2782200a [file] [log] [blame]
Nikita Popoveaac3482022-03-10 11:14:45 +01001; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
pvanhout3890a3b2023-06-28 12:22:26 +02002; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=sroa,amdgpu-promote-alloca < %s | FileCheck %s
Simon Pilgrim337b2d02017-09-14 10:30:54 +00003
4; Make sure that array alloca loaded and stored as multi-element aggregates are handled correctly
5; Strictly the promote-alloca pass shouldn't have to deal with this case as it is non-canonical, but
6; the pass should handle it gracefully if it is
7; The checks look for lines that previously caused issues in PromoteAlloca (non-canonical). Opt
8; should now leave these unchanged
9
Simon Pilgrim337b2d02017-09-14 10:30:54 +000010%Block = type { [1 x float], i32 }
11%gl_PerVertex = type { <4 x float>, float, [1 x float], [1 x float] }
Matt Arsenault37ab4cf2017-09-14 18:02:29 +000012%struct = type { i32, i32 }
Simon Pilgrim337b2d02017-09-14 10:30:54 +000013
14@block = external addrspace(1) global %Block
15@pv = external addrspace(1) global %gl_PerVertex
16
17define amdgpu_vs void @promote_1d_aggr() #0 {
Nikita Popoveaac3482022-03-10 11:14:45 +010018; CHECK-LABEL: @promote_1d_aggr(
Matt Arsenaultc1710e72022-11-28 10:01:20 -050019; CHECK-NEXT: [[F1:%.*]] = alloca [1 x float], align 4, addrspace(5)
Matt Arsenault50caf692022-11-28 10:36:38 -050020; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK:%.*]], ptr addrspace(1) @block, i32 0, i32 1
21; CHECK-NEXT: [[FOO1:%.*]] = load i32, ptr addrspace(1) [[FOO]], align 4
Matt Arsenault50caf692022-11-28 10:36:38 -050022; CHECK-NEXT: [[FOO3:%.*]] = load [1 x float], ptr addrspace(1) @block, align 4
pvanhout3890a3b2023-06-28 12:22:26 +020023; CHECK-NEXT: [[FOO3_FCA_0_EXTRACT:%.*]] = extractvalue [1 x float] [[FOO3]], 0
24; CHECK-NEXT: [[FOO3_FCA_0_GEP:%.*]] = getelementptr inbounds [1 x float], ptr addrspace(5) [[F1]], i32 0, i32 0
25; CHECK-NEXT: store float [[FOO3_FCA_0_EXTRACT]], ptr addrspace(5) [[FOO3_FCA_0_GEP]], align 4
26; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [1 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO1]]
Matt Arsenault50caf692022-11-28 10:36:38 -050027; CHECK-NEXT: [[FOO6:%.*]] = load float, ptr addrspace(5) [[FOO5]], align 4
pvanhout3890a3b2023-06-28 12:22:26 +020028; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> undef, float [[FOO6]], i32 0
Nikita Popoveaac3482022-03-10 11:14:45 +010029; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[FOO6]], i32 1
30; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[FOO6]], i32 2
31; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x float> [[FOO11]], float [[FOO6]], i32 3
Matt Arsenault50caf692022-11-28 10:36:38 -050032; CHECK-NEXT: store <4 x float> [[FOO12]], ptr addrspace(1) @pv, align 16
Nikita Popoveaac3482022-03-10 11:14:45 +010033; CHECK-NEXT: ret void
34;
Matt Arsenaultc1710e72022-11-28 10:01:20 -050035 %i = alloca i32, addrspace(5)
36 %f1 = alloca [1 x float], addrspace(5)
Matt Arsenault50caf692022-11-28 10:36:38 -050037 %foo = getelementptr %Block, ptr addrspace(1) @block, i32 0, i32 1
38 %foo1 = load i32, ptr addrspace(1) %foo
39 store i32 %foo1, ptr addrspace(5) %i
40 %foo3 = load [1 x float], ptr addrspace(1) @block
41 store [1 x float] %foo3, ptr addrspace(5) %f1
42 %foo4 = load i32, ptr addrspace(5) %i
43 %foo5 = getelementptr [1 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
44 %foo6 = load float, ptr addrspace(5) %foo5
Matt Arsenaultc1710e72022-11-28 10:01:20 -050045 %foo7 = alloca <4 x float>, addrspace(5)
Matt Arsenault50caf692022-11-28 10:36:38 -050046 %foo8 = load <4 x float>, ptr addrspace(5) %foo7
Nikita Popoveaac3482022-03-10 11:14:45 +010047 %foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0
48 %foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1
49 %foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2
50 %foo12 = insertelement <4 x float> %foo11, float %foo6, i32 3
Matt Arsenault50caf692022-11-28 10:36:38 -050051 store <4 x float> %foo12, ptr addrspace(1) @pv
Simon Pilgrim337b2d02017-09-14 10:30:54 +000052 ret void
53}
54
Simon Pilgrim337b2d02017-09-14 10:30:54 +000055%Block2 = type { i32, [2 x float] }
56@block2 = external addrspace(1) global %Block2
57
58define amdgpu_vs void @promote_store_aggr() #0 {
Nikita Popoveaac3482022-03-10 11:14:45 +010059; CHECK-LABEL: @promote_store_aggr(
Matt Arsenault50caf692022-11-28 10:36:38 -050060; CHECK-NEXT: [[FOO1:%.*]] = load i32, ptr addrspace(1) @block2, align 4
pvanhout3890a3b2023-06-28 12:22:26 +020061; CHECK-NEXT: [[FOO3:%.*]] = sitofp i32 [[FOO1]] to float
62; CHECK-NEXT: [[FOO6_FCA_0_INSERT:%.*]] = insertvalue [2 x float] poison, float [[FOO3]], 0
63; CHECK-NEXT: [[FOO6_FCA_1_INSERT:%.*]] = insertvalue [2 x float] [[FOO6_FCA_0_INSERT]], float 2.000000e+00, 1
Matt Arsenault50caf692022-11-28 10:36:38 -050064; CHECK-NEXT: [[FOO7:%.*]] = getelementptr [[BLOCK2:%.*]], ptr addrspace(1) @block2, i32 0, i32 1
pvanhout3890a3b2023-06-28 12:22:26 +020065; CHECK-NEXT: store [2 x float] [[FOO6_FCA_1_INSERT]], ptr addrspace(1) [[FOO7]], align 4
Paul Walker38fffa62024-11-06 11:53:33 +000066; CHECK-NEXT: store <4 x float> splat (float 1.000000e+00), ptr addrspace(1) @pv, align 16
Nikita Popoveaac3482022-03-10 11:14:45 +010067; CHECK-NEXT: ret void
68;
Matt Arsenaultc1710e72022-11-28 10:01:20 -050069 %i = alloca i32, addrspace(5)
70 %f1 = alloca [2 x float], addrspace(5)
Matt Arsenault50caf692022-11-28 10:36:38 -050071 %foo1 = load i32, ptr addrspace(1) @block2
72 store i32 %foo1, ptr addrspace(5) %i
73 %foo2 = load i32, ptr addrspace(5) %i
Nikita Popoveaac3482022-03-10 11:14:45 +010074 %foo3 = sitofp i32 %foo2 to float
Matt Arsenault50caf692022-11-28 10:36:38 -050075 store float %foo3, ptr addrspace(5) %f1
76 %foo5 = getelementptr [2 x float], ptr addrspace(5) %f1, i32 0, i32 1
77 store float 2.000000e+00, ptr addrspace(5) %foo5
78 %foo6 = load [2 x float], ptr addrspace(5) %f1
79 %foo7 = getelementptr %Block2, ptr addrspace(1) @block2, i32 0, i32 1
80 store [2 x float] %foo6, ptr addrspace(1) %foo7
81 store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, ptr addrspace(1) @pv
Simon Pilgrim337b2d02017-09-14 10:30:54 +000082 ret void
83}
84
Simon Pilgrim337b2d02017-09-14 10:30:54 +000085%Block3 = type { [2 x float], i32 }
86@block3 = external addrspace(1) global %Block3
87
88define amdgpu_vs void @promote_load_from_store_aggr() #0 {
Nikita Popoveaac3482022-03-10 11:14:45 +010089; CHECK-LABEL: @promote_load_from_store_aggr(
Matt Arsenaultc5fe0752025-03-18 17:27:02 +070090; CHECK-NEXT: [[F1:%.*]] = freeze <2 x float> poison
Matt Arsenault50caf692022-11-28 10:36:38 -050091; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 1
92; CHECK-NEXT: [[FOO1:%.*]] = load i32, ptr addrspace(1) [[FOO]], align 4
Matt Arsenault50caf692022-11-28 10:36:38 -050093; CHECK-NEXT: [[FOO3:%.*]] = load [2 x float], ptr addrspace(1) @block3, align 4
pvanhout3890a3b2023-06-28 12:22:26 +020094; CHECK-NEXT: [[FOO3_FCA_0_EXTRACT:%.*]] = extractvalue [2 x float] [[FOO3]], 0
Matt Arsenaultc5fe0752025-03-18 17:27:02 +070095; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> [[F1]], float [[FOO3_FCA_0_EXTRACT]], i32 0
pvanhout3890a3b2023-06-28 12:22:26 +020096; CHECK-NEXT: [[FOO3_FCA_1_EXTRACT:%.*]] = extractvalue [2 x float] [[FOO3]], 1
Alex Richardsone39f6c12023-10-25 15:12:01 -070097; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[FOO3_FCA_1_EXTRACT]], i32 1
pvanhout3890a3b2023-06-28 12:22:26 +020098; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 [[FOO1]]
99; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
100; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[TMP3]], i32 1
101; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[TMP3]], i32 2
102; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x float> [[FOO11]], float [[TMP3]], i32 3
Matt Arsenault50caf692022-11-28 10:36:38 -0500103; CHECK-NEXT: store <4 x float> [[FOO12]], ptr addrspace(1) @pv, align 16
Nikita Popoveaac3482022-03-10 11:14:45 +0100104; CHECK-NEXT: ret void
105;
Matt Arsenaultc1710e72022-11-28 10:01:20 -0500106 %i = alloca i32, addrspace(5)
107 %f1 = alloca [2 x float], addrspace(5)
Matt Arsenault50caf692022-11-28 10:36:38 -0500108 %foo = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 1
109 %foo1 = load i32, ptr addrspace(1) %foo
110 store i32 %foo1, ptr addrspace(5) %i
111 %foo3 = load [2 x float], ptr addrspace(1) @block3
112 store [2 x float] %foo3, ptr addrspace(5) %f1
113 %foo4 = load i32, ptr addrspace(5) %i
114 %foo5 = getelementptr [2 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
115 %foo6 = load float, ptr addrspace(5) %foo5
Matt Arsenaultc1710e72022-11-28 10:01:20 -0500116 %foo7 = alloca <4 x float>, addrspace(5)
Matt Arsenault50caf692022-11-28 10:36:38 -0500117 %foo8 = load <4 x float>, ptr addrspace(5) %foo7
Nikita Popoveaac3482022-03-10 11:14:45 +0100118 %foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0
119 %foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1
120 %foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2
121 %foo12 = insertelement <4 x float> %foo11, float %foo6, i32 3
Matt Arsenault50caf692022-11-28 10:36:38 -0500122 store <4 x float> %foo12, ptr addrspace(1) @pv
Simon Pilgrim337b2d02017-09-14 10:30:54 +0000123 ret void
124}
125
Sumanth Gundapaneni4c9e14b2025-02-24 13:36:30 -0600126%Block4 = type { [2 x i32], i32 }
127@block4 = external addrspace(1) global %Block4
128%gl_PV = type { <4 x i32>, i32, [1 x i32], [1 x i32] }
129@pv1 = external addrspace(1) global %gl_PV
130
131; This should not crash on an aliased variable offset that can be
132; optimized out (variable %aliasTofoo3 in the test)
133define amdgpu_vs void @promote_load_from_store_aggr_varoff(<4 x i32> %input) {
134; CHECK-LABEL: @promote_load_from_store_aggr_varoff(
Matt Arsenaultc5fe0752025-03-18 17:27:02 +0700135; CHECK-NEXT: [[F1:%.*]] = freeze <3 x i32> poison
Sumanth Gundapaneni4c9e14b2025-02-24 13:36:30 -0600136; CHECK-NEXT: [[FOO3_UNPACK2:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
Matt Arsenaultc5fe0752025-03-18 17:27:02 +0700137; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> [[F1]], i32 [[FOO3_UNPACK2]], i32 2
Sumanth Gundapaneni4c9e14b2025-02-24 13:36:30 -0600138; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i32> [[TMP1]], i32 [[FOO3_UNPACK2]]
Matt Arsenault1d0dd762025-03-12 20:36:46 +0700139; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x i32> [[INPUT:%.*]], i32 [[TMP2]], i64 3
Sumanth Gundapaneni4c9e14b2025-02-24 13:36:30 -0600140; CHECK-NEXT: store <4 x i32> [[FOO12]], ptr addrspace(1) @pv1, align 16
141; CHECK-NEXT: ret void
142;
143 %f1 = alloca [3 x i32], align 4, addrspace(5)
144 %G1 = getelementptr inbounds i8, ptr addrspace(5) %f1, i32 8
145 %foo3.unpack2 = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
146 store i32 %foo3.unpack2, ptr addrspace(5) %G1, align 4
147 %aliasTofoo3 = load i32, ptr addrspace(5) %G1, align 4
148 %foo5 = getelementptr [3 x i32], ptr addrspace(5) %f1, i32 0, i32 %aliasTofoo3
149 %foo6 = load i32, ptr addrspace(5) %foo5, align 4
150 %foo12 = insertelement <4 x i32> %input, i32 %foo6, i64 3
151 store <4 x i32> %foo12, ptr addrspace(1) @pv1, align 16
152 ret void
153}
154
Ruiling Song5d0ff922022-12-20 08:58:46 +0800155define amdgpu_vs void @promote_memmove_aggr() #0 {
156; CHECK-LABEL: @promote_memmove_aggr(
Matt Arsenaultc5fe0752025-03-18 17:27:02 +0700157; CHECK-NEXT: [[F1:%.*]] = freeze <5 x float> poison
158; CHECK-NEXT: [[TMP1:%.*]] = insertelement <5 x float> [[F1]], float 0.000000e+00, i32 0
159; CHECK-NEXT: [[TMP2:%.*]] = insertelement <5 x float> [[TMP1]], float 0.000000e+00, i32 1
160; CHECK-NEXT: [[TMP3:%.*]] = insertelement <5 x float> [[TMP2]], float 0.000000e+00, i32 2
161; CHECK-NEXT: [[TMP4:%.*]] = insertelement <5 x float> [[TMP3]], float 0.000000e+00, i32 3
162; CHECK-NEXT: [[TMP5:%.*]] = insertelement <5 x float> [[TMP4]], float 0.000000e+00, i32 4
163; CHECK-NEXT: [[TMP6:%.*]] = insertelement <5 x float> [[TMP5]], float 1.000000e+00, i32 1
164; CHECK-NEXT: [[TMP7:%.*]] = insertelement <5 x float> [[TMP6]], float 2.000000e+00, i32 3
165; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <5 x float> [[TMP7]], <5 x float> poison, <5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 4>
pvanhout3890a3b2023-06-28 12:22:26 +0200166; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(1) @pv, align 4
Ruiling Song5d0ff922022-12-20 08:58:46 +0800167; CHECK-NEXT: ret void
168;
169 %f1 = alloca [5 x float], addrspace(5)
170 store [5 x float] zeroinitializer, ptr addrspace(5) %f1
171 %foo1 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 1
172 store float 1.0, ptr addrspace(5) %foo1
173 %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
174 store float 2.0, ptr addrspace(5) %foo2
Nikita Popov00a4e242024-02-05 12:41:37 +0100175 call void @llvm.memmove.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo1, i32 16, i1 false)
Ruiling Song5d0ff922022-12-20 08:58:46 +0800176 %foo3 = load float, ptr addrspace(5) %f1
177 store float %foo3, ptr addrspace(1) @pv
178 ret void
179}
180
181define amdgpu_vs void @promote_memcpy_aggr() #0 {
182; CHECK-LABEL: @promote_memcpy_aggr(
Matt Arsenaultc5fe0752025-03-18 17:27:02 +0700183; CHECK-NEXT: [[F1:%.*]] = freeze <5 x float> poison
184; CHECK-NEXT: [[TMP7:%.*]] = insertelement <5 x float> [[F1]], float 0.000000e+00, i32 0
185; CHECK-NEXT: [[TMP8:%.*]] = insertelement <5 x float> [[TMP7]], float 0.000000e+00, i32 1
186; CHECK-NEXT: [[TMP9:%.*]] = insertelement <5 x float> [[TMP8]], float 0.000000e+00, i32 2
187; CHECK-NEXT: [[TMP4:%.*]] = insertelement <5 x float> [[TMP9]], float 0.000000e+00, i32 3
188; CHECK-NEXT: [[TMP5:%.*]] = insertelement <5 x float> [[TMP4]], float 0.000000e+00, i32 4
189; CHECK-NEXT: [[TMP6:%.*]] = insertelement <5 x float> [[TMP5]], float 2.000000e+00, i32 3
Ruiling Song5d0ff922022-12-20 08:58:46 +0800190; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
191; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
Matt Arsenaultc5fe0752025-03-18 17:27:02 +0700192; CHECK-NEXT: [[TMP1:%.*]] = insertelement <5 x float> [[TMP6]], float 3.000000e+00, i32 [[FOO4]]
pvanhout3890a3b2023-06-28 12:22:26 +0200193; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <5 x float> [[TMP1]], <5 x float> poison, <5 x i32> <i32 3, i32 4, i32 2, i32 3, i32 4>
194; CHECK-NEXT: [[TMP3:%.*]] = extractelement <5 x float> [[TMP2]], i32 0
195; CHECK-NEXT: store float [[TMP3]], ptr addrspace(1) @pv, align 4
Ruiling Song5d0ff922022-12-20 08:58:46 +0800196; CHECK-NEXT: ret void
197;
198 %f1 = alloca [5 x float], addrspace(5)
199 store [5 x float] zeroinitializer, ptr addrspace(5) %f1
200
201 %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
202 store float 2.0, ptr addrspace(5) %foo2
203
204 %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
205 %foo4 = load i32, ptr addrspace(1) %foo3
206 %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
207 store float 3.0, ptr addrspace(5) %foo5
208
Nikita Popov00a4e242024-02-05 12:41:37 +0100209 call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo2, i32 8, i1 false)
Ruiling Song5d0ff922022-12-20 08:58:46 +0800210 %foo6 = load float, ptr addrspace(5) %f1
211 store float %foo6, ptr addrspace(1) @pv
212 ret void
213}
214
215define amdgpu_vs void @promote_memcpy_identity_aggr() #0 {
216; CHECK-LABEL: @promote_memcpy_identity_aggr(
pvanhout3890a3b2023-06-28 12:22:26 +0200217; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) @pv, align 4
Ruiling Song5d0ff922022-12-20 08:58:46 +0800218; CHECK-NEXT: ret void
219;
220 %f1 = alloca [5 x float], addrspace(5)
221 store [5 x float] zeroinitializer, ptr addrspace(5) %f1
222 %foo1 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 1
223 store float 1.0, ptr addrspace(5) %foo1
224 %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
225 store float 2.0, ptr addrspace(5) %foo2
Nikita Popov00a4e242024-02-05 12:41:37 +0100226 call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %f1, i32 20, i1 false)
Ruiling Song5d0ff922022-12-20 08:58:46 +0800227 %foo3 = load float, ptr addrspace(5) %f1
228 store float %foo3, ptr addrspace(1) @pv
229 ret void
230}
231
232; TODO: promote alloca even there is a memcpy between different alloca
233define amdgpu_vs void @promote_memcpy_two_aggrs() #0 {
234; CHECK-LABEL: @promote_memcpy_two_aggrs(
235; CHECK-NEXT: [[F1:%.*]] = alloca [5 x float], align 4, addrspace(5)
236; CHECK-NEXT: [[F2:%.*]] = alloca [5 x float], align 4, addrspace(5)
pvanhout3890a3b2023-06-28 12:22:26 +0200237; CHECK-NEXT: [[DOTFCA_0_GEP1:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 0
238; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP1]], align 4
239; CHECK-NEXT: [[DOTFCA_1_GEP2:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 1
240; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP2]], align 4
241; CHECK-NEXT: [[DOTFCA_2_GEP3:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 2
242; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP3]], align 4
243; CHECK-NEXT: [[DOTFCA_3_GEP4:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 3
244; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP4]], align 4
245; CHECK-NEXT: [[DOTFCA_4_GEP5:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 4
246; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP5]], align 4
247; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 0
248; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP]], align 4
249; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 1
250; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP]], align 4
251; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 2
252; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP]], align 4
253; CHECK-NEXT: [[DOTFCA_3_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 3
254; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP]], align 4
255; CHECK-NEXT: [[DOTFCA_4_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 4
256; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP]], align 4
Ruiling Song5d0ff922022-12-20 08:58:46 +0800257; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
258; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
259; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO4]]
260; CHECK-NEXT: store float 3.000000e+00, ptr addrspace(5) [[FOO5]], align 4
261; CHECK-NEXT: call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 [[F2]], ptr addrspace(5) align 4 [[F1]], i32 8, i1 false)
262; CHECK-NEXT: [[FOO6:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 [[FOO4]]
263; CHECK-NEXT: [[FOO7:%.*]] = load float, ptr addrspace(5) [[FOO6]], align 4
264; CHECK-NEXT: store float [[FOO7]], ptr addrspace(1) @pv, align 4
265; CHECK-NEXT: ret void
266;
267 %f1 = alloca [5 x float], addrspace(5)
268 %f2 = alloca [5 x float], addrspace(5)
269
270 store [5 x float] zeroinitializer, ptr addrspace(5) %f1
271 store [5 x float] zeroinitializer, ptr addrspace(5) %f2
272
273 %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
274 %foo4 = load i32, ptr addrspace(1) %foo3
275 %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
276 store float 3.0, ptr addrspace(5) %foo5
277
Nikita Popov00a4e242024-02-05 12:41:37 +0100278 call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %f2, ptr addrspace(5) align 4 %f1, i32 8, i1 false)
Ruiling Song5d0ff922022-12-20 08:58:46 +0800279
280 %foo6 = getelementptr [5 x float], ptr addrspace(5) %f2, i32 0, i32 %foo4
281 %foo7 = load float, ptr addrspace(5) %foo6
282 store float %foo7, ptr addrspace(1) @pv
283 ret void
284}
285
286; TODO: promote alloca even there is a memcpy between the alloca and other memory space.
287define amdgpu_vs void @promote_memcpy_p1p5_aggr(ptr addrspace(1) inreg %src) #0 {
288; CHECK-LABEL: @promote_memcpy_p1p5_aggr(
289; CHECK-NEXT: [[F1:%.*]] = alloca [5 x float], align 4, addrspace(5)
pvanhout3890a3b2023-06-28 12:22:26 +0200290; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 0
291; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP]], align 4
292; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 1
293; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP]], align 4
294; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 2
295; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP]], align 4
296; CHECK-NEXT: [[DOTFCA_3_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 3
297; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP]], align 4
298; CHECK-NEXT: [[DOTFCA_4_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 4
299; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP]], align 4
Ruiling Song5d0ff922022-12-20 08:58:46 +0800300; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
301; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
302; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO4]]
303; CHECK-NEXT: store float 3.000000e+00, ptr addrspace(5) [[FOO5]], align 4
304; CHECK-NEXT: call void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) align 4 @pv, ptr addrspace(5) align 4 [[F1]], i32 8, i1 false)
305; CHECK-NEXT: ret void
306;
307 %f1 = alloca [5 x float], addrspace(5)
308 store [5 x float] zeroinitializer, ptr addrspace(5) %f1
309
310 %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
311 %foo4 = load i32, ptr addrspace(1) %foo3
312 %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
313 store float 3.0, ptr addrspace(5) %foo5
314
Nikita Popov00a4e242024-02-05 12:41:37 +0100315 call void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) align 4 @pv, ptr addrspace(5) align 4 %f1, i32 8, i1 false)
Ruiling Song5d0ff922022-12-20 08:58:46 +0800316 ret void
317}
318
319define amdgpu_vs void @promote_memcpy_inline_aggr() #0 {
320; CHECK-LABEL: @promote_memcpy_inline_aggr(
Matt Arsenaultc5fe0752025-03-18 17:27:02 +0700321; CHECK-NEXT: [[F1:%.*]] = freeze <5 x float> poison
322; CHECK-NEXT: [[TMP6:%.*]] = insertelement <5 x float> [[F1]], float 0.000000e+00, i32 0
323; CHECK-NEXT: [[TMP7:%.*]] = insertelement <5 x float> [[TMP6]], float 0.000000e+00, i32 1
324; CHECK-NEXT: [[TMP8:%.*]] = insertelement <5 x float> [[TMP7]], float 0.000000e+00, i32 2
325; CHECK-NEXT: [[TMP4:%.*]] = insertelement <5 x float> [[TMP8]], float 0.000000e+00, i32 3
326; CHECK-NEXT: [[TMP5:%.*]] = insertelement <5 x float> [[TMP4]], float 0.000000e+00, i32 4
Ruiling Song5d0ff922022-12-20 08:58:46 +0800327; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
328; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
Matt Arsenaultc5fe0752025-03-18 17:27:02 +0700329; CHECK-NEXT: [[TMP1:%.*]] = insertelement <5 x float> [[TMP5]], float 3.000000e+00, i32 [[FOO4]]
pvanhout3890a3b2023-06-28 12:22:26 +0200330; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <5 x float> [[TMP1]], <5 x float> poison, <5 x i32> <i32 3, i32 4, i32 2, i32 3, i32 4>
331; CHECK-NEXT: [[TMP3:%.*]] = extractelement <5 x float> [[TMP2]], i32 0
332; CHECK-NEXT: store float [[TMP3]], ptr addrspace(1) @pv, align 4
Ruiling Song5d0ff922022-12-20 08:58:46 +0800333; CHECK-NEXT: ret void
334;
335 %f1 = alloca [5 x float], addrspace(5)
336 store [5 x float] zeroinitializer, ptr addrspace(5) %f1
337
338 %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
339 %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
340 %foo4 = load i32, ptr addrspace(1) %foo3
341 %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
342 store float 3.0, ptr addrspace(5) %foo5
343
Nikita Popov00a4e242024-02-05 12:41:37 +0100344 call void @llvm.memcpy.inline.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo2, i32 8, i1 false)
Ruiling Song5d0ff922022-12-20 08:58:46 +0800345 %foo6 = load float, ptr addrspace(5) %f1
346 store float %foo6, ptr addrspace(1) @pv
347 ret void
348}
349
Nikita Popov00a4e242024-02-05 12:41:37 +0100350declare void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
351declare void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
352declare void @llvm.memcpy.inline.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
353declare void @llvm.memmove.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
Ruiling Song5d0ff922022-12-20 08:58:46 +0800354
Simon Pilgrim337b2d02017-09-14 10:30:54 +0000355@tmp_g = external addrspace(1) global { [4 x double], <2 x double>, <3 x double>, <4 x double> }
356@frag_color = external addrspace(1) global <4 x float>
357
358define amdgpu_ps void @promote_double_aggr() #0 {
Nikita Popoveaac3482022-03-10 11:14:45 +0100359; CHECK-LABEL: @promote_double_aggr(
Matt Arsenault50caf692022-11-28 10:36:38 -0500360; CHECK-NEXT: [[FOO:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 0
361; CHECK-NEXT: [[FOO1:%.*]] = load double, ptr addrspace(1) [[FOO]], align 8
362; CHECK-NEXT: [[FOO2:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 1
363; CHECK-NEXT: [[FOO3:%.*]] = load double, ptr addrspace(1) [[FOO2]], align 8
Matt Arsenault5a0a2f82025-03-12 16:11:11 +0700364; CHECK-NEXT: [[FOO4:%.*]] = insertvalue [2 x double] poison, double [[FOO1]], 0
Nikita Popoveaac3482022-03-10 11:14:45 +0100365; CHECK-NEXT: [[FOO5:%.*]] = insertvalue [2 x double] [[FOO4]], double [[FOO3]], 1
pvanhout3890a3b2023-06-28 12:22:26 +0200366; CHECK-NEXT: [[FOO5_FCA_0_EXTRACT:%.*]] = extractvalue [2 x double] [[FOO5]], 0
367; CHECK-NEXT: [[FOO5_FCA_1_EXTRACT:%.*]] = extractvalue [2 x double] [[FOO5]], 1
368; CHECK-NEXT: [[FOO10:%.*]] = fadd double [[FOO5_FCA_1_EXTRACT]], [[FOO5_FCA_1_EXTRACT]]
369; CHECK-NEXT: [[FOO16:%.*]] = fadd double [[FOO10]], [[FOO5_FCA_1_EXTRACT]]
Nikita Popoveaac3482022-03-10 11:14:45 +0100370; CHECK-NEXT: [[FOO17:%.*]] = fptrunc double [[FOO16]] to float
Matt Arsenault1d0dd762025-03-12 20:36:46 +0700371; CHECK-NEXT: [[FOO18:%.*]] = insertelement <4 x float> poison, float [[FOO17]], i32 0
Nikita Popoveaac3482022-03-10 11:14:45 +0100372; CHECK-NEXT: [[FOO19:%.*]] = insertelement <4 x float> [[FOO18]], float [[FOO17]], i32 1
373; CHECK-NEXT: [[FOO20:%.*]] = insertelement <4 x float> [[FOO19]], float [[FOO17]], i32 2
374; CHECK-NEXT: [[FOO21:%.*]] = insertelement <4 x float> [[FOO20]], float [[FOO17]], i32 3
Matt Arsenault50caf692022-11-28 10:36:38 -0500375; CHECK-NEXT: store <4 x float> [[FOO21]], ptr addrspace(1) @frag_color, align 16
Nikita Popoveaac3482022-03-10 11:14:45 +0100376; CHECK-NEXT: ret void
377;
Matt Arsenaultc1710e72022-11-28 10:01:20 -0500378 %s = alloca [2 x double], addrspace(5)
Matt Arsenault50caf692022-11-28 10:36:38 -0500379 %foo = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 0
380 %foo1 = load double, ptr addrspace(1) %foo
381 %foo2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 1
382 %foo3 = load double, ptr addrspace(1) %foo2
Matt Arsenault5a0a2f82025-03-12 16:11:11 +0700383 %foo4 = insertvalue [2 x double] poison, double %foo1, 0
Nikita Popoveaac3482022-03-10 11:14:45 +0100384 %foo5 = insertvalue [2 x double] %foo4, double %foo3, 1
Matt Arsenault50caf692022-11-28 10:36:38 -0500385 store [2 x double] %foo5, ptr addrspace(5) %s
386 %foo6 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1
387 %foo7 = load double, ptr addrspace(5) %foo6
388 %foo8 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1
389 %foo9 = load double, ptr addrspace(5) %foo8
Nikita Popoveaac3482022-03-10 11:14:45 +0100390 %foo10 = fadd double %foo7, %foo9
Matt Arsenault50caf692022-11-28 10:36:38 -0500391 store double %foo10, ptr addrspace(5) %s
392 %foo13 = load double, ptr addrspace(5) %s
393 %foo14 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1
394 %foo15 = load double, ptr addrspace(5) %foo14
Nikita Popoveaac3482022-03-10 11:14:45 +0100395 %foo16 = fadd double %foo13, %foo15
396 %foo17 = fptrunc double %foo16 to float
Matt Arsenault1d0dd762025-03-12 20:36:46 +0700397 %foo18 = insertelement <4 x float> poison, float %foo17, i32 0
Nikita Popoveaac3482022-03-10 11:14:45 +0100398 %foo19 = insertelement <4 x float> %foo18, float %foo17, i32 1
399 %foo20 = insertelement <4 x float> %foo19, float %foo17, i32 2
400 %foo21 = insertelement <4 x float> %foo20, float %foo17, i32 3
Matt Arsenault50caf692022-11-28 10:36:38 -0500401 store <4 x float> %foo21, ptr addrspace(1) @frag_color
Simon Pilgrim337b2d02017-09-14 10:30:54 +0000402 ret void
403}
Matt Arsenault37ab4cf2017-09-14 18:02:29 +0000404
405; Don't crash on a type that isn't a valid vector element.
Matt Arsenault37ab4cf2017-09-14 18:02:29 +0000406define amdgpu_kernel void @alloca_struct() #0 {
Nikita Popoveaac3482022-03-10 11:14:45 +0100407; CHECK-LABEL: @alloca_struct(
408; CHECK-NEXT: entry:
Nikita Popoveaac3482022-03-10 11:14:45 +0100409; CHECK-NEXT: ret void
410;
Matt Arsenault37ab4cf2017-09-14 18:02:29 +0000411entry:
Matt Arsenaultc1710e72022-11-28 10:01:20 -0500412 %alloca = alloca [2 x %struct], align 4, addrspace(5)
Matt Arsenault37ab4cf2017-09-14 18:02:29 +0000413 ret void
414}