blob: 595f8491b405c9e1e728b0d415063a44fe9a839f [file] [log] [blame]
Simon Pilgrimcff85502016-01-15 09:52:50 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Wang, Pengfeic22dc712020-11-16 09:44:06 +08002; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX
Simon Pilgrim528e94e2016-02-04 15:51:55 +00007;
Simon Pilgrimab7c46e2016-08-18 13:41:26 +00008; 32-bit SSE tests to make sure we do reasonable things.
Simon Pilgrim793192d2020-12-02 16:10:50 +00009; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86-SSE,X86-SSE1
10; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE41
Simon Pilgrimcff85502016-01-15 09:52:50 +000011
Nikita Popov2f448bf2022-06-22 14:33:12 +020012define <2 x double> @merge_2f64_f64_23(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrimcff85502016-01-15 09:52:50 +000013; SSE-LABEL: merge_2f64_f64_23:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000014; SSE: # %bb.0:
Simon Pilgrimcff85502016-01-15 09:52:50 +000015; SSE-NEXT: movups 16(%rdi), %xmm0
16; SSE-NEXT: retq
17;
18; AVX-LABEL: merge_2f64_f64_23:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000019; AVX: # %bb.0:
Simon Pilgrimcff85502016-01-15 09:52:50 +000020; AVX-NEXT: vmovups 16(%rdi), %xmm0
21; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +000022;
Simon Pilgrim793192d2020-12-02 16:10:50 +000023; X86-SSE1-LABEL: merge_2f64_f64_23:
24; X86-SSE1: # %bb.0:
25; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
26; X86-SSE1-NEXT: fldl 16(%eax)
27; X86-SSE1-NEXT: fldl 24(%eax)
28; X86-SSE1-NEXT: fxch %st(1)
29; X86-SSE1-NEXT: retl
Simon Pilgrimab7c46e2016-08-18 13:41:26 +000030;
Simon Pilgrim793192d2020-12-02 16:10:50 +000031; X86-SSE41-LABEL: merge_2f64_f64_23:
32; X86-SSE41: # %bb.0:
33; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
34; X86-SSE41-NEXT: movups 16(%eax), %xmm0
35; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +020036 %ptr0 = getelementptr inbounds double, ptr %ptr, i64 2
37 %ptr1 = getelementptr inbounds double, ptr %ptr, i64 3
38 %val0 = load double, ptr %ptr0
39 %val1 = load double, ptr %ptr1
Simon Pilgrimcff85502016-01-15 09:52:50 +000040 %res0 = insertelement <2 x double> undef, double %val0, i32 0
41 %res1 = insertelement <2 x double> %res0, double %val1, i32 1
42 ret <2 x double> %res1
43}
44
Nikita Popov2f448bf2022-06-22 14:33:12 +020045define <2 x i64> @merge_2i64_i64_12(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrimcff85502016-01-15 09:52:50 +000046; SSE-LABEL: merge_2i64_i64_12:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000047; SSE: # %bb.0:
Simon Pilgrimcff85502016-01-15 09:52:50 +000048; SSE-NEXT: movups 8(%rdi), %xmm0
49; SSE-NEXT: retq
50;
51; AVX-LABEL: merge_2i64_i64_12:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000052; AVX: # %bb.0:
Simon Pilgrimcff85502016-01-15 09:52:50 +000053; AVX-NEXT: vmovups 8(%rdi), %xmm0
54; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +000055;
Simon Pilgrim793192d2020-12-02 16:10:50 +000056; X86-SSE1-LABEL: merge_2i64_i64_12:
57; X86-SSE1: # %bb.0:
58; X86-SSE1-NEXT: pushl %edi
59; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
60; X86-SSE1-NEXT: pushl %esi
61; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
62; X86-SSE1-NEXT: .cfi_offset %esi, -12
63; X86-SSE1-NEXT: .cfi_offset %edi, -8
64; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
65; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
66; X86-SSE1-NEXT: movl 8(%ecx), %edx
67; X86-SSE1-NEXT: movl 12(%ecx), %esi
68; X86-SSE1-NEXT: movl 16(%ecx), %edi
69; X86-SSE1-NEXT: movl 20(%ecx), %ecx
70; X86-SSE1-NEXT: movl %ecx, 12(%eax)
71; X86-SSE1-NEXT: movl %edi, 8(%eax)
72; X86-SSE1-NEXT: movl %esi, 4(%eax)
73; X86-SSE1-NEXT: movl %edx, (%eax)
74; X86-SSE1-NEXT: popl %esi
75; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
76; X86-SSE1-NEXT: popl %edi
77; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
78; X86-SSE1-NEXT: retl $4
Simon Pilgrimab7c46e2016-08-18 13:41:26 +000079;
Simon Pilgrim793192d2020-12-02 16:10:50 +000080; X86-SSE41-LABEL: merge_2i64_i64_12:
81; X86-SSE41: # %bb.0:
82; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
83; X86-SSE41-NEXT: movups 8(%eax), %xmm0
84; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +020085 %ptr0 = getelementptr inbounds i64, ptr %ptr, i64 1
86 %ptr1 = getelementptr inbounds i64, ptr %ptr, i64 2
87 %val0 = load i64, ptr %ptr0
88 %val1 = load i64, ptr %ptr1
Simon Pilgrimcff85502016-01-15 09:52:50 +000089 %res0 = insertelement <2 x i64> undef, i64 %val0, i32 0
90 %res1 = insertelement <2 x i64> %res0, i64 %val1, i32 1
91 ret <2 x i64> %res1
92}
93
Nikita Popov2f448bf2022-06-22 14:33:12 +020094define <4 x float> @merge_4f32_f32_2345(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrimcff85502016-01-15 09:52:50 +000095; SSE-LABEL: merge_4f32_f32_2345:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000096; SSE: # %bb.0:
Simon Pilgrimcff85502016-01-15 09:52:50 +000097; SSE-NEXT: movups 8(%rdi), %xmm0
98; SSE-NEXT: retq
99;
100; AVX-LABEL: merge_4f32_f32_2345:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000101; AVX: # %bb.0:
Simon Pilgrimcff85502016-01-15 09:52:50 +0000102; AVX-NEXT: vmovups 8(%rdi), %xmm0
103; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000104;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000105; X86-SSE-LABEL: merge_4f32_f32_2345:
106; X86-SSE: # %bb.0:
107; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
108; X86-SSE-NEXT: movups 8(%eax), %xmm0
109; X86-SSE-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200110 %ptr0 = getelementptr inbounds float, ptr %ptr, i64 2
111 %ptr1 = getelementptr inbounds float, ptr %ptr, i64 3
112 %ptr2 = getelementptr inbounds float, ptr %ptr, i64 4
113 %ptr3 = getelementptr inbounds float, ptr %ptr, i64 5
114 %val0 = load float, ptr %ptr0
115 %val1 = load float, ptr %ptr1
116 %val2 = load float, ptr %ptr2
117 %val3 = load float, ptr %ptr3
Simon Pilgrimcff85502016-01-15 09:52:50 +0000118 %res0 = insertelement <4 x float> undef, float %val0, i32 0
119 %res1 = insertelement <4 x float> %res0, float %val1, i32 1
120 %res2 = insertelement <4 x float> %res1, float %val2, i32 2
121 %res3 = insertelement <4 x float> %res2, float %val3, i32 3
122 ret <4 x float> %res3
123}
124
Nikita Popov2f448bf2022-06-22 14:33:12 +0200125define <4 x float> @merge_4f32_f32_3zuu(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrimcff85502016-01-15 09:52:50 +0000126; SSE-LABEL: merge_4f32_f32_3zuu:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000127; SSE: # %bb.0:
Simon Pilgrimcff85502016-01-15 09:52:50 +0000128; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
129; SSE-NEXT: retq
130;
Simon Pilgrimf5c23ad2016-02-01 22:26:28 +0000131; AVX-LABEL: merge_4f32_f32_3zuu:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000132; AVX: # %bb.0:
Simon Pilgrimf5c23ad2016-02-01 22:26:28 +0000133; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
134; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000135;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000136; X86-SSE-LABEL: merge_4f32_f32_3zuu:
137; X86-SSE: # %bb.0:
138; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
139; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
140; X86-SSE-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200141 %ptr0 = getelementptr inbounds float, ptr %ptr, i64 3
142 %val0 = load float, ptr %ptr0
Simon Pilgrimcff85502016-01-15 09:52:50 +0000143 %res0 = insertelement <4 x float> undef, float %val0, i32 0
144 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1
145 ret <4 x float> %res1
146}
147
Nikita Popov2f448bf2022-06-22 14:33:12 +0200148define <4 x float> @merge_4f32_f32_34uu(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrimcff85502016-01-15 09:52:50 +0000149; SSE-LABEL: merge_4f32_f32_34uu:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000150; SSE: # %bb.0:
Simon Pilgrim7823fd22016-02-04 19:27:51 +0000151; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrimcff85502016-01-15 09:52:50 +0000152; SSE-NEXT: retq
153;
154; AVX-LABEL: merge_4f32_f32_34uu:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000155; AVX: # %bb.0:
Simon Pilgrim7823fd22016-02-04 19:27:51 +0000156; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrimcff85502016-01-15 09:52:50 +0000157; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000158;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000159; X86-SSE1-LABEL: merge_4f32_f32_34uu:
160; X86-SSE1: # %bb.0:
161; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
162; X86-SSE1-NEXT: xorps %xmm0, %xmm0
163; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
164; X86-SSE1-NEXT: retl
Simon Pilgrimab7c46e2016-08-18 13:41:26 +0000165;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000166; X86-SSE41-LABEL: merge_4f32_f32_34uu:
167; X86-SSE41: # %bb.0:
168; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
169; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
170; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200171 %ptr0 = getelementptr inbounds float, ptr %ptr, i64 3
172 %ptr1 = getelementptr inbounds float, ptr %ptr, i64 4
173 %val0 = load float, ptr %ptr0
174 %val1 = load float, ptr %ptr1
Simon Pilgrimcff85502016-01-15 09:52:50 +0000175 %res0 = insertelement <4 x float> undef, float %val0, i32 0
176 %res1 = insertelement <4 x float> %res0, float %val1, i32 1
177 ret <4 x float> %res1
178}
179
Nikita Popov2f448bf2022-06-22 14:33:12 +0200180define <4 x float> @merge_4f32_f32_34z6(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrime9093ad2016-02-21 19:15:48 +0000181; SSE2-LABEL: merge_4f32_f32_34z6:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000182; SSE2: # %bb.0:
Simon Pilgrime9093ad2016-02-21 19:15:48 +0000183; SSE2-NEXT: movups 12(%rdi), %xmm0
184; SSE2-NEXT: xorps %xmm1, %xmm1
185; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
186; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
187; SSE2-NEXT: retq
188;
189; SSE41-LABEL: merge_4f32_f32_34z6:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000190; SSE41: # %bb.0:
Simon Pilgrime9093ad2016-02-21 19:15:48 +0000191; SSE41-NEXT: movups 12(%rdi), %xmm1
192; SSE41-NEXT: xorps %xmm0, %xmm0
193; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3]
194; SSE41-NEXT: retq
Simon Pilgrimcff85502016-01-15 09:52:50 +0000195;
Simon Pilgrimf5c23ad2016-02-01 22:26:28 +0000196; AVX-LABEL: merge_4f32_f32_34z6:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000197; AVX: # %bb.0:
Simon Pilgrime9093ad2016-02-21 19:15:48 +0000198; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
199; AVX-NEXT: vblendps {{.*#+}} xmm0 = mem[0,1],xmm0[2],mem[3]
Simon Pilgrimf5c23ad2016-02-01 22:26:28 +0000200; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000201;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000202; X86-SSE1-LABEL: merge_4f32_f32_34z6:
203; X86-SSE1: # %bb.0:
204; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
205; X86-SSE1-NEXT: movups 12(%eax), %xmm0
206; X86-SSE1-NEXT: xorps %xmm1, %xmm1
207; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
208; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
209; X86-SSE1-NEXT: retl
Simon Pilgrimab7c46e2016-08-18 13:41:26 +0000210;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000211; X86-SSE41-LABEL: merge_4f32_f32_34z6:
212; X86-SSE41: # %bb.0:
213; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
214; X86-SSE41-NEXT: movups 12(%eax), %xmm1
215; X86-SSE41-NEXT: xorps %xmm0, %xmm0
216; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3]
217; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200218 %ptr0 = getelementptr inbounds float, ptr %ptr, i64 3
219 %ptr1 = getelementptr inbounds float, ptr %ptr, i64 4
220 %ptr3 = getelementptr inbounds float, ptr %ptr, i64 6
221 %val0 = load float, ptr %ptr0
222 %val1 = load float, ptr %ptr1
223 %val3 = load float, ptr %ptr3
Simon Pilgrimcff85502016-01-15 09:52:50 +0000224 %res0 = insertelement <4 x float> zeroinitializer, float %val0, i32 0
225 %res1 = insertelement <4 x float> %res0, float %val1, i32 1
226 %res3 = insertelement <4 x float> %res1, float %val3, i32 3
227 ret <4 x float> %res3
228}
229
Nikita Popov2f448bf2022-06-22 14:33:12 +0200230define <4 x float> @merge_4f32_f32_45zz(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrimcff85502016-01-15 09:52:50 +0000231; SSE-LABEL: merge_4f32_f32_45zz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000232; SSE: # %bb.0:
Simon Pilgrim7823fd22016-02-04 19:27:51 +0000233; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrimcff85502016-01-15 09:52:50 +0000234; SSE-NEXT: retq
235;
236; AVX-LABEL: merge_4f32_f32_45zz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000237; AVX: # %bb.0:
Simon Pilgrim7823fd22016-02-04 19:27:51 +0000238; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrimcff85502016-01-15 09:52:50 +0000239; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000240;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000241; X86-SSE1-LABEL: merge_4f32_f32_45zz:
242; X86-SSE1: # %bb.0:
243; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
244; X86-SSE1-NEXT: xorps %xmm0, %xmm0
245; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
246; X86-SSE1-NEXT: retl
Simon Pilgrimab7c46e2016-08-18 13:41:26 +0000247;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000248; X86-SSE41-LABEL: merge_4f32_f32_45zz:
249; X86-SSE41: # %bb.0:
250; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
251; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
252; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200253 %ptr0 = getelementptr inbounds float, ptr %ptr, i64 4
254 %ptr1 = getelementptr inbounds float, ptr %ptr, i64 5
255 %val0 = load float, ptr %ptr0
256 %val1 = load float, ptr %ptr1
Simon Pilgrimcff85502016-01-15 09:52:50 +0000257 %res0 = insertelement <4 x float> zeroinitializer, float %val0, i32 0
258 %res1 = insertelement <4 x float> %res0, float %val1, i32 1
259 ret <4 x float> %res1
260}
Simon Pilgrim01108902016-03-24 00:14:37 +0000261
Nikita Popov2f448bf2022-06-22 14:33:12 +0200262define <4 x float> @merge_4f32_f32_012u(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrim7cdedc12016-01-16 19:53:33 +0000263; SSE2-LABEL: merge_4f32_f32_012u:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000264; SSE2: # %bb.0:
Simon Pilgrim7cdedc12016-01-16 19:53:33 +0000265; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
Simon Pilgrim46dd55f2017-06-04 20:12:04 +0000266; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
Craig Toppera6054322017-09-18 04:40:58 +0000267; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim7cdedc12016-01-16 19:53:33 +0000268; SSE2-NEXT: retq
269;
270; SSE41-LABEL: merge_4f32_f32_012u:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000271; SSE41: # %bb.0:
Simon Pilgrim7823fd22016-02-04 19:27:51 +0000272; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrim7cdedc12016-01-16 19:53:33 +0000273; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
274; SSE41-NEXT: retq
275;
Simon Pilgrim025a3d852016-02-01 22:05:50 +0000276; AVX-LABEL: merge_4f32_f32_012u:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000277; AVX: # %bb.0:
Simon Pilgrim7823fd22016-02-04 19:27:51 +0000278; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrim025a3d852016-02-01 22:05:50 +0000279; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
280; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000281;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000282; X86-SSE1-LABEL: merge_4f32_f32_012u:
283; X86-SSE1: # %bb.0:
284; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
285; X86-SSE1-NEXT: xorps %xmm0, %xmm0
286; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
287; X86-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
288; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
289; X86-SSE1-NEXT: retl
Simon Pilgrimab7c46e2016-08-18 13:41:26 +0000290;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000291; X86-SSE41-LABEL: merge_4f32_f32_012u:
292; X86-SSE41: # %bb.0:
293; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
294; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
295; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
296; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200297 %ptr1 = getelementptr inbounds float, ptr %ptr, i64 1
298 %ptr2 = getelementptr inbounds float, ptr %ptr, i64 2
299 %val0 = load float, ptr %ptr
300 %val1 = load float, ptr %ptr1
301 %val2 = load float, ptr %ptr2
Simon Pilgrim7cdedc12016-01-16 19:53:33 +0000302 %res0 = insertelement <4 x float> undef, float %val0, i32 0
303 %res1 = insertelement <4 x float> %res0, float %val1, i32 1
304 %res2 = insertelement <4 x float> %res1, float %val2, i32 2
305 %res3 = insertelement <4 x float> %res2, float undef, i32 3
306 ret <4 x float> %res3
307}
308
Nikita Popov2f448bf2022-06-22 14:33:12 +0200309define <4 x float> @merge_4f32_f32_019u(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrim7cdedc12016-01-16 19:53:33 +0000310; SSE2-LABEL: merge_4f32_f32_019u:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000311; SSE2: # %bb.0:
Simon Pilgrim7cdedc12016-01-16 19:53:33 +0000312; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
Simon Pilgrim46dd55f2017-06-04 20:12:04 +0000313; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
Craig Toppera6054322017-09-18 04:40:58 +0000314; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim7cdedc12016-01-16 19:53:33 +0000315; SSE2-NEXT: retq
316;
317; SSE41-LABEL: merge_4f32_f32_019u:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000318; SSE41: # %bb.0:
Simon Pilgrim7823fd22016-02-04 19:27:51 +0000319; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrim7cdedc12016-01-16 19:53:33 +0000320; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
321; SSE41-NEXT: retq
322;
Simon Pilgrim025a3d852016-02-01 22:05:50 +0000323; AVX-LABEL: merge_4f32_f32_019u:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000324; AVX: # %bb.0:
Simon Pilgrim7823fd22016-02-04 19:27:51 +0000325; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrim025a3d852016-02-01 22:05:50 +0000326; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
327; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000328;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000329; X86-SSE1-LABEL: merge_4f32_f32_019u:
330; X86-SSE1: # %bb.0:
331; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
332; X86-SSE1-NEXT: xorps %xmm0, %xmm0
333; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
334; X86-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
335; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
336; X86-SSE1-NEXT: retl
Simon Pilgrimab7c46e2016-08-18 13:41:26 +0000337;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000338; X86-SSE41-LABEL: merge_4f32_f32_019u:
339; X86-SSE41: # %bb.0:
340; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
341; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
342; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
343; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200344 %ptr1 = getelementptr inbounds float, ptr %ptr, i64 1
345 %ptr2 = getelementptr inbounds float, ptr %ptr, i64 9
346 %val0 = load float, ptr %ptr
347 %val1 = load float, ptr %ptr1
348 %val2 = load float, ptr %ptr2
Simon Pilgrim7cdedc12016-01-16 19:53:33 +0000349 %res0 = insertelement <4 x float> undef, float %val0, i32 0
350 %res1 = insertelement <4 x float> %res0, float %val1, i32 1
351 %res2 = insertelement <4 x float> %res1, float %val2, i32 2
352 %res3 = insertelement <4 x float> %res2, float undef, i32 3
353 ret <4 x float> %res3
354}
Simon Pilgrimcff85502016-01-15 09:52:50 +0000355
Nikita Popov2f448bf2022-06-22 14:33:12 +0200356define <4 x i32> @merge_4i32_i32_23u5(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrimcff85502016-01-15 09:52:50 +0000357; SSE-LABEL: merge_4i32_i32_23u5:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000358; SSE: # %bb.0:
Simon Pilgrimcff85502016-01-15 09:52:50 +0000359; SSE-NEXT: movups 8(%rdi), %xmm0
360; SSE-NEXT: retq
361;
362; AVX-LABEL: merge_4i32_i32_23u5:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000363; AVX: # %bb.0:
Simon Pilgrimcff85502016-01-15 09:52:50 +0000364; AVX-NEXT: vmovups 8(%rdi), %xmm0
365; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000366;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000367; X86-SSE1-LABEL: merge_4i32_i32_23u5:
368; X86-SSE1: # %bb.0:
369; X86-SSE1-NEXT: pushl %esi
370; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
371; X86-SSE1-NEXT: .cfi_offset %esi, -8
372; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
373; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
374; X86-SSE1-NEXT: movl 8(%ecx), %edx
375; X86-SSE1-NEXT: movl 12(%ecx), %esi
376; X86-SSE1-NEXT: movl 20(%ecx), %ecx
377; X86-SSE1-NEXT: movl %esi, 4(%eax)
378; X86-SSE1-NEXT: movl %edx, (%eax)
379; X86-SSE1-NEXT: movl %ecx, 12(%eax)
380; X86-SSE1-NEXT: popl %esi
381; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
382; X86-SSE1-NEXT: retl $4
Simon Pilgrimab7c46e2016-08-18 13:41:26 +0000383;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000384; X86-SSE41-LABEL: merge_4i32_i32_23u5:
385; X86-SSE41: # %bb.0:
386; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
387; X86-SSE41-NEXT: movups 8(%eax), %xmm0
388; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200389 %ptr0 = getelementptr inbounds i32, ptr %ptr, i64 2
390 %ptr1 = getelementptr inbounds i32, ptr %ptr, i64 3
391 %ptr3 = getelementptr inbounds i32, ptr %ptr, i64 5
392 %val0 = load i32, ptr %ptr0
393 %val1 = load i32, ptr %ptr1
394 %val3 = load i32, ptr %ptr3
Simon Pilgrimcff85502016-01-15 09:52:50 +0000395 %res0 = insertelement <4 x i32> undef, i32 %val0, i32 0
396 %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
397 %res3 = insertelement <4 x i32> %res1, i32 %val3, i32 3
398 ret <4 x i32> %res3
399}
400
Nikita Popov2f448bf2022-06-22 14:33:12 +0200401define <4 x i32> @merge_4i32_i32_23u5_inc2(ptr %ptr) nounwind uwtable noinline ssp {
Artur Pilipenko7b152542017-10-05 16:28:21 +0000402; SSE-LABEL: merge_4i32_i32_23u5_inc2:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000403; SSE: # %bb.0:
Artur Pilipenko7b152542017-10-05 16:28:21 +0000404; SSE-NEXT: movups 8(%rdi), %xmm0
405; SSE-NEXT: incl 8(%rdi)
406; SSE-NEXT: retq
407;
408; AVX-LABEL: merge_4i32_i32_23u5_inc2:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000409; AVX: # %bb.0:
Artur Pilipenko7b152542017-10-05 16:28:21 +0000410; AVX-NEXT: vmovups 8(%rdi), %xmm0
411; AVX-NEXT: incl 8(%rdi)
412; AVX-NEXT: retq
413;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000414; X86-SSE1-LABEL: merge_4i32_i32_23u5_inc2:
415; X86-SSE1: # %bb.0:
416; X86-SSE1-NEXT: pushl %edi
417; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
418; X86-SSE1-NEXT: pushl %esi
419; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
420; X86-SSE1-NEXT: .cfi_offset %esi, -12
421; X86-SSE1-NEXT: .cfi_offset %edi, -8
422; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
423; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
424; X86-SSE1-NEXT: movl 8(%ecx), %edx
425; X86-SSE1-NEXT: movl 12(%ecx), %esi
426; X86-SSE1-NEXT: leal 1(%edx), %edi
427; X86-SSE1-NEXT: movl %edi, 8(%ecx)
428; X86-SSE1-NEXT: movl 20(%ecx), %ecx
429; X86-SSE1-NEXT: movl %esi, 4(%eax)
430; X86-SSE1-NEXT: movl %edx, (%eax)
431; X86-SSE1-NEXT: movl %ecx, 12(%eax)
432; X86-SSE1-NEXT: popl %esi
433; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
434; X86-SSE1-NEXT: popl %edi
435; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
436; X86-SSE1-NEXT: retl $4
Artur Pilipenko7b152542017-10-05 16:28:21 +0000437;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000438; X86-SSE41-LABEL: merge_4i32_i32_23u5_inc2:
439; X86-SSE41: # %bb.0:
440; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
441; X86-SSE41-NEXT: movups 8(%eax), %xmm0
442; X86-SSE41-NEXT: incl 8(%eax)
443; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200444 %ptr0 = getelementptr inbounds i32, ptr %ptr, i64 2
445 %ptr1 = getelementptr inbounds i32, ptr %ptr, i64 3
446 %ptr3 = getelementptr inbounds i32, ptr %ptr, i64 5
447 %val0 = load i32, ptr %ptr0
Artur Pilipenko7b152542017-10-05 16:28:21 +0000448 %inc = add i32 %val0, 1
Nikita Popov2f448bf2022-06-22 14:33:12 +0200449 store i32 %inc, ptr %ptr0
450 %val1 = load i32, ptr %ptr1
451 %val3 = load i32, ptr %ptr3
Artur Pilipenko7b152542017-10-05 16:28:21 +0000452 %res0 = insertelement <4 x i32> undef, i32 %val0, i32 0
453 %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
454 %res3 = insertelement <4 x i32> %res1, i32 %val3, i32 3
455 ret <4 x i32> %res3
456}
457
Nikita Popov2f448bf2022-06-22 14:33:12 +0200458define <4 x i32> @merge_4i32_i32_23u5_inc3(ptr %ptr) nounwind uwtable noinline ssp {
Artur Pilipenko7b152542017-10-05 16:28:21 +0000459; SSE-LABEL: merge_4i32_i32_23u5_inc3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000460; SSE: # %bb.0:
Artur Pilipenko7b152542017-10-05 16:28:21 +0000461; SSE-NEXT: movups 8(%rdi), %xmm0
462; SSE-NEXT: incl 12(%rdi)
463; SSE-NEXT: retq
464;
465; AVX-LABEL: merge_4i32_i32_23u5_inc3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000466; AVX: # %bb.0:
Artur Pilipenko7b152542017-10-05 16:28:21 +0000467; AVX-NEXT: vmovups 8(%rdi), %xmm0
468; AVX-NEXT: incl 12(%rdi)
469; AVX-NEXT: retq
470;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000471; X86-SSE1-LABEL: merge_4i32_i32_23u5_inc3:
472; X86-SSE1: # %bb.0:
473; X86-SSE1-NEXT: pushl %edi
474; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
475; X86-SSE1-NEXT: pushl %esi
476; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
477; X86-SSE1-NEXT: .cfi_offset %esi, -12
478; X86-SSE1-NEXT: .cfi_offset %edi, -8
479; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
480; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
481; X86-SSE1-NEXT: movl 8(%ecx), %edx
482; X86-SSE1-NEXT: movl 12(%ecx), %esi
483; X86-SSE1-NEXT: leal 1(%esi), %edi
484; X86-SSE1-NEXT: movl %edi, 12(%ecx)
485; X86-SSE1-NEXT: movl 20(%ecx), %ecx
486; X86-SSE1-NEXT: movl %esi, 4(%eax)
487; X86-SSE1-NEXT: movl %edx, (%eax)
488; X86-SSE1-NEXT: movl %ecx, 12(%eax)
489; X86-SSE1-NEXT: popl %esi
490; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
491; X86-SSE1-NEXT: popl %edi
492; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
493; X86-SSE1-NEXT: retl $4
Artur Pilipenko7b152542017-10-05 16:28:21 +0000494;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000495; X86-SSE41-LABEL: merge_4i32_i32_23u5_inc3:
496; X86-SSE41: # %bb.0:
497; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
498; X86-SSE41-NEXT: movups 8(%eax), %xmm0
499; X86-SSE41-NEXT: incl 12(%eax)
500; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200501 %ptr0 = getelementptr inbounds i32, ptr %ptr, i64 2
502 %ptr1 = getelementptr inbounds i32, ptr %ptr, i64 3
503 %ptr3 = getelementptr inbounds i32, ptr %ptr, i64 5
504 %val0 = load i32, ptr %ptr0
505 %val1 = load i32, ptr %ptr1
Artur Pilipenko7b152542017-10-05 16:28:21 +0000506 %inc = add i32 %val1, 1
Nikita Popov2f448bf2022-06-22 14:33:12 +0200507 store i32 %inc, ptr %ptr1
508 %val3 = load i32, ptr %ptr3
Artur Pilipenko7b152542017-10-05 16:28:21 +0000509 %res0 = insertelement <4 x i32> undef, i32 %val0, i32 0
510 %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
511 %res3 = insertelement <4 x i32> %res1, i32 %val3, i32 3
512 ret <4 x i32> %res3
513}
514
Nikita Popov2f448bf2022-06-22 14:33:12 +0200515define <4 x i32> @merge_4i32_i32_3zuu(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrimcff85502016-01-15 09:52:50 +0000516; SSE-LABEL: merge_4i32_i32_3zuu:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000517; SSE: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +0000518; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Simon Pilgrimcff85502016-01-15 09:52:50 +0000519; SSE-NEXT: retq
520;
Simon Pilgrim5be17b62016-02-01 23:04:05 +0000521; AVX-LABEL: merge_4i32_i32_3zuu:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000522; AVX: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +0000523; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Simon Pilgrim5be17b62016-02-01 23:04:05 +0000524; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000525;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000526; X86-SSE1-LABEL: merge_4i32_i32_3zuu:
527; X86-SSE1: # %bb.0:
528; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
529; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
530; X86-SSE1-NEXT: movl 12(%ecx), %ecx
531; X86-SSE1-NEXT: movl %ecx, (%eax)
532; X86-SSE1-NEXT: movl $0, 4(%eax)
533; X86-SSE1-NEXT: retl $4
Simon Pilgrimab7c46e2016-08-18 13:41:26 +0000534;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000535; X86-SSE41-LABEL: merge_4i32_i32_3zuu:
536; X86-SSE41: # %bb.0:
537; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
538; X86-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
539; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200540 %ptr0 = getelementptr inbounds i32, ptr %ptr, i64 3
541 %val0 = load i32, ptr %ptr0
Simon Pilgrimcff85502016-01-15 09:52:50 +0000542 %res0 = insertelement <4 x i32> undef, i32 %val0, i32 0
543 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1
544 ret <4 x i32> %res1
545}
546
Nikita Popov2f448bf2022-06-22 14:33:12 +0200547define <4 x i32> @merge_4i32_i32_34uu(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrimcff85502016-01-15 09:52:50 +0000548; SSE-LABEL: merge_4i32_i32_34uu:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000549; SSE: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +0000550; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrimcff85502016-01-15 09:52:50 +0000551; SSE-NEXT: retq
552;
553; AVX-LABEL: merge_4i32_i32_34uu:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000554; AVX: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +0000555; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrimcff85502016-01-15 09:52:50 +0000556; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000557;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000558; X86-SSE1-LABEL: merge_4i32_i32_34uu:
559; X86-SSE1: # %bb.0:
560; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
561; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
562; X86-SSE1-NEXT: movl 12(%ecx), %edx
563; X86-SSE1-NEXT: movl 16(%ecx), %ecx
564; X86-SSE1-NEXT: movl %ecx, 4(%eax)
565; X86-SSE1-NEXT: movl %edx, (%eax)
566; X86-SSE1-NEXT: retl $4
Simon Pilgrimab7c46e2016-08-18 13:41:26 +0000567;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000568; X86-SSE41-LABEL: merge_4i32_i32_34uu:
569; X86-SSE41: # %bb.0:
570; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
571; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
572; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200573 %ptr0 = getelementptr inbounds i32, ptr %ptr, i64 3
574 %ptr1 = getelementptr inbounds i32, ptr %ptr, i64 4
575 %val0 = load i32, ptr %ptr0
576 %val1 = load i32, ptr %ptr1
Simon Pilgrimcff85502016-01-15 09:52:50 +0000577 %res0 = insertelement <4 x i32> undef, i32 %val0, i32 0
578 %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
579 ret <4 x i32> %res1
580}
581
Nikita Popov2f448bf2022-06-22 14:33:12 +0200582define <4 x i32> @merge_4i32_i32_45zz(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrimcff85502016-01-15 09:52:50 +0000583; SSE-LABEL: merge_4i32_i32_45zz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000584; SSE: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +0000585; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrimcff85502016-01-15 09:52:50 +0000586; SSE-NEXT: retq
587;
588; AVX-LABEL: merge_4i32_i32_45zz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000589; AVX: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +0000590; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrimcff85502016-01-15 09:52:50 +0000591; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000592;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000593; X86-SSE1-LABEL: merge_4i32_i32_45zz:
594; X86-SSE1: # %bb.0:
595; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
596; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
597; X86-SSE1-NEXT: movl 16(%ecx), %edx
598; X86-SSE1-NEXT: movl 20(%ecx), %ecx
599; X86-SSE1-NEXT: movl %ecx, 4(%eax)
600; X86-SSE1-NEXT: movl %edx, (%eax)
601; X86-SSE1-NEXT: movl $0, 12(%eax)
602; X86-SSE1-NEXT: movl $0, 8(%eax)
603; X86-SSE1-NEXT: retl $4
Simon Pilgrimab7c46e2016-08-18 13:41:26 +0000604;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000605; X86-SSE41-LABEL: merge_4i32_i32_45zz:
606; X86-SSE41: # %bb.0:
607; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
608; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
609; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200610 %ptr0 = getelementptr inbounds i32, ptr %ptr, i64 4
611 %ptr1 = getelementptr inbounds i32, ptr %ptr, i64 5
612 %val0 = load i32, ptr %ptr0
613 %val1 = load i32, ptr %ptr1
Simon Pilgrimcff85502016-01-15 09:52:50 +0000614 %res0 = insertelement <4 x i32> zeroinitializer, i32 %val0, i32 0
615 %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
616 ret <4 x i32> %res1
617}
618
Nikita Popov2f448bf2022-06-22 14:33:12 +0200619define <4 x i32> @merge_4i32_i32_45zz_inc4(ptr %ptr) nounwind uwtable noinline ssp {
Artur Pilipenko7b152542017-10-05 16:28:21 +0000620; SSE-LABEL: merge_4i32_i32_45zz_inc4:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000621; SSE: # %bb.0:
Artur Pilipenko7b152542017-10-05 16:28:21 +0000622; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
623; SSE-NEXT: incl 16(%rdi)
624; SSE-NEXT: retq
625;
626; AVX-LABEL: merge_4i32_i32_45zz_inc4:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000627; AVX: # %bb.0:
Artur Pilipenko7b152542017-10-05 16:28:21 +0000628; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
629; AVX-NEXT: incl 16(%rdi)
630; AVX-NEXT: retq
631;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000632; X86-SSE1-LABEL: merge_4i32_i32_45zz_inc4:
633; X86-SSE1: # %bb.0:
634; X86-SSE1-NEXT: pushl %edi
635; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
636; X86-SSE1-NEXT: pushl %esi
637; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
638; X86-SSE1-NEXT: .cfi_offset %esi, -12
639; X86-SSE1-NEXT: .cfi_offset %edi, -8
640; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
641; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
642; X86-SSE1-NEXT: movl 16(%ecx), %edx
643; X86-SSE1-NEXT: movl 20(%ecx), %esi
644; X86-SSE1-NEXT: leal 1(%edx), %edi
645; X86-SSE1-NEXT: movl %edi, 16(%ecx)
646; X86-SSE1-NEXT: movl %esi, 4(%eax)
647; X86-SSE1-NEXT: movl %edx, (%eax)
648; X86-SSE1-NEXT: movl $0, 12(%eax)
649; X86-SSE1-NEXT: movl $0, 8(%eax)
650; X86-SSE1-NEXT: popl %esi
651; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
652; X86-SSE1-NEXT: popl %edi
653; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
654; X86-SSE1-NEXT: retl $4
Artur Pilipenko7b152542017-10-05 16:28:21 +0000655;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000656; X86-SSE41-LABEL: merge_4i32_i32_45zz_inc4:
657; X86-SSE41: # %bb.0:
658; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
659; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
660; X86-SSE41-NEXT: incl 16(%eax)
661; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200662 %ptr0 = getelementptr inbounds i32, ptr %ptr, i64 4
663 %ptr1 = getelementptr inbounds i32, ptr %ptr, i64 5
664 %val0 = load i32, ptr %ptr0
Artur Pilipenko7b152542017-10-05 16:28:21 +0000665 %inc = add i32 %val0, 1
Nikita Popov2f448bf2022-06-22 14:33:12 +0200666 store i32 %inc, ptr %ptr0
667 %val1 = load i32, ptr %ptr1
Artur Pilipenko7b152542017-10-05 16:28:21 +0000668 %res0 = insertelement <4 x i32> zeroinitializer, i32 %val0, i32 0
669 %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
670 ret <4 x i32> %res1
671}
672
Nikita Popov2f448bf2022-06-22 14:33:12 +0200673define <4 x i32> @merge_4i32_i32_45zz_inc5(ptr %ptr) nounwind uwtable noinline ssp {
Artur Pilipenko7b152542017-10-05 16:28:21 +0000674; SSE-LABEL: merge_4i32_i32_45zz_inc5:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000675; SSE: # %bb.0:
Artur Pilipenko7b152542017-10-05 16:28:21 +0000676; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
677; SSE-NEXT: incl 20(%rdi)
678; SSE-NEXT: retq
679;
680; AVX-LABEL: merge_4i32_i32_45zz_inc5:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000681; AVX: # %bb.0:
Artur Pilipenko7b152542017-10-05 16:28:21 +0000682; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
683; AVX-NEXT: incl 20(%rdi)
684; AVX-NEXT: retq
685;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000686; X86-SSE1-LABEL: merge_4i32_i32_45zz_inc5:
687; X86-SSE1: # %bb.0:
688; X86-SSE1-NEXT: pushl %edi
689; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
690; X86-SSE1-NEXT: pushl %esi
691; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
692; X86-SSE1-NEXT: .cfi_offset %esi, -12
693; X86-SSE1-NEXT: .cfi_offset %edi, -8
694; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
695; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
696; X86-SSE1-NEXT: movl 16(%ecx), %edx
697; X86-SSE1-NEXT: movl 20(%ecx), %esi
698; X86-SSE1-NEXT: leal 1(%esi), %edi
699; X86-SSE1-NEXT: movl %edi, 20(%ecx)
700; X86-SSE1-NEXT: movl %esi, 4(%eax)
701; X86-SSE1-NEXT: movl %edx, (%eax)
702; X86-SSE1-NEXT: movl $0, 12(%eax)
703; X86-SSE1-NEXT: movl $0, 8(%eax)
704; X86-SSE1-NEXT: popl %esi
705; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
706; X86-SSE1-NEXT: popl %edi
707; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
708; X86-SSE1-NEXT: retl $4
Artur Pilipenko7b152542017-10-05 16:28:21 +0000709;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000710; X86-SSE41-LABEL: merge_4i32_i32_45zz_inc5:
711; X86-SSE41: # %bb.0:
712; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
713; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
714; X86-SSE41-NEXT: incl 20(%eax)
715; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200716 %ptr0 = getelementptr inbounds i32, ptr %ptr, i64 4
717 %ptr1 = getelementptr inbounds i32, ptr %ptr, i64 5
718 %val0 = load i32, ptr %ptr0
719 %val1 = load i32, ptr %ptr1
Artur Pilipenko7b152542017-10-05 16:28:21 +0000720 %inc = add i32 %val1, 1
Nikita Popov2f448bf2022-06-22 14:33:12 +0200721 store i32 %inc, ptr %ptr1
Artur Pilipenko7b152542017-10-05 16:28:21 +0000722 %res0 = insertelement <4 x i32> zeroinitializer, i32 %val0, i32 0
723 %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
724 ret <4 x i32> %res1
725}
726
Nikita Popov2f448bf2022-06-22 14:33:12 +0200727define <8 x i16> @merge_8i16_i16_23u567u9(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrimcff85502016-01-15 09:52:50 +0000728; SSE-LABEL: merge_8i16_i16_23u567u9:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000729; SSE: # %bb.0:
Simon Pilgrimcff85502016-01-15 09:52:50 +0000730; SSE-NEXT: movups 4(%rdi), %xmm0
731; SSE-NEXT: retq
732;
733; AVX-LABEL: merge_8i16_i16_23u567u9:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000734; AVX: # %bb.0:
Simon Pilgrimcff85502016-01-15 09:52:50 +0000735; AVX-NEXT: vmovups 4(%rdi), %xmm0
736; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000737;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000738; X86-SSE1-LABEL: merge_8i16_i16_23u567u9:
739; X86-SSE1: # %bb.0:
740; X86-SSE1-NEXT: pushl %edi
741; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
742; X86-SSE1-NEXT: pushl %esi
743; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
744; X86-SSE1-NEXT: .cfi_offset %esi, -12
745; X86-SSE1-NEXT: .cfi_offset %edi, -8
746; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
747; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
748; X86-SSE1-NEXT: movl 4(%ecx), %edx
749; X86-SSE1-NEXT: movl 10(%ecx), %esi
750; X86-SSE1-NEXT: movzwl 14(%ecx), %edi
751; X86-SSE1-NEXT: movzwl 18(%ecx), %ecx
752; X86-SSE1-NEXT: movw %di, 10(%eax)
753; X86-SSE1-NEXT: movw %cx, 14(%eax)
754; X86-SSE1-NEXT: movl %esi, 6(%eax)
755; X86-SSE1-NEXT: movl %edx, (%eax)
756; X86-SSE1-NEXT: popl %esi
757; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
758; X86-SSE1-NEXT: popl %edi
759; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
760; X86-SSE1-NEXT: retl $4
Simon Pilgrimab7c46e2016-08-18 13:41:26 +0000761;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000762; X86-SSE41-LABEL: merge_8i16_i16_23u567u9:
763; X86-SSE41: # %bb.0:
764; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
765; X86-SSE41-NEXT: movups 4(%eax), %xmm0
766; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200767 %ptr0 = getelementptr inbounds i16, ptr %ptr, i64 2
768 %ptr1 = getelementptr inbounds i16, ptr %ptr, i64 3
769 %ptr3 = getelementptr inbounds i16, ptr %ptr, i64 5
770 %ptr4 = getelementptr inbounds i16, ptr %ptr, i64 6
771 %ptr5 = getelementptr inbounds i16, ptr %ptr, i64 7
772 %ptr7 = getelementptr inbounds i16, ptr %ptr, i64 9
773 %val0 = load i16, ptr %ptr0
774 %val1 = load i16, ptr %ptr1
775 %val3 = load i16, ptr %ptr3
776 %val4 = load i16, ptr %ptr4
777 %val5 = load i16, ptr %ptr5
778 %val7 = load i16, ptr %ptr7
Simon Pilgrimcff85502016-01-15 09:52:50 +0000779 %res0 = insertelement <8 x i16> undef, i16 %val0, i32 0
780 %res1 = insertelement <8 x i16> %res0, i16 %val1, i32 1
781 %res3 = insertelement <8 x i16> %res1, i16 %val3, i32 3
782 %res4 = insertelement <8 x i16> %res3, i16 %val4, i32 4
783 %res5 = insertelement <8 x i16> %res4, i16 %val5, i32 5
784 %res7 = insertelement <8 x i16> %res5, i16 %val7, i32 7
785 ret <8 x i16> %res7
786}
787
Nikita Popov2f448bf2022-06-22 14:33:12 +0200788define <8 x i16> @merge_8i16_i16_34uuuuuu(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrimcff85502016-01-15 09:52:50 +0000789; SSE-LABEL: merge_8i16_i16_34uuuuuu:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000790; SSE: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +0000791; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Simon Pilgrimcff85502016-01-15 09:52:50 +0000792; SSE-NEXT: retq
793;
794; AVX-LABEL: merge_8i16_i16_34uuuuuu:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000795; AVX: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +0000796; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Simon Pilgrimcff85502016-01-15 09:52:50 +0000797; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000798;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000799; X86-SSE1-LABEL: merge_8i16_i16_34uuuuuu:
800; X86-SSE1: # %bb.0:
801; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
802; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
803; X86-SSE1-NEXT: movl 6(%ecx), %ecx
804; X86-SSE1-NEXT: movl %ecx, (%eax)
805; X86-SSE1-NEXT: retl $4
Simon Pilgrimab7c46e2016-08-18 13:41:26 +0000806;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000807; X86-SSE41-LABEL: merge_8i16_i16_34uuuuuu:
808; X86-SSE41: # %bb.0:
809; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
810; X86-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
811; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200812 %ptr0 = getelementptr inbounds i16, ptr %ptr, i64 3
813 %ptr1 = getelementptr inbounds i16, ptr %ptr, i64 4
814 %val0 = load i16, ptr %ptr0
815 %val1 = load i16, ptr %ptr1
Simon Pilgrimcff85502016-01-15 09:52:50 +0000816 %res0 = insertelement <8 x i16> undef, i16 %val0, i32 0
817 %res1 = insertelement <8 x i16> %res0, i16 %val1, i32 1
818 ret <8 x i16> %res1
819}
820
Nikita Popov2f448bf2022-06-22 14:33:12 +0200821define <8 x i16> @merge_8i16_i16_45u7zzzz(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrimcff85502016-01-15 09:52:50 +0000822; SSE-LABEL: merge_8i16_i16_45u7zzzz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000823; SSE: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +0000824; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrimcff85502016-01-15 09:52:50 +0000825; SSE-NEXT: retq
826;
827; AVX-LABEL: merge_8i16_i16_45u7zzzz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000828; AVX: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +0000829; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrimcff85502016-01-15 09:52:50 +0000830; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000831;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000832; X86-SSE1-LABEL: merge_8i16_i16_45u7zzzz:
833; X86-SSE1: # %bb.0:
834; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
835; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
836; X86-SSE1-NEXT: movl 8(%ecx), %edx
837; X86-SSE1-NEXT: movzwl 14(%ecx), %ecx
838; X86-SSE1-NEXT: movw %cx, 6(%eax)
839; X86-SSE1-NEXT: movl %edx, (%eax)
840; X86-SSE1-NEXT: movl $0, 12(%eax)
841; X86-SSE1-NEXT: movl $0, 8(%eax)
842; X86-SSE1-NEXT: retl $4
Simon Pilgrimab7c46e2016-08-18 13:41:26 +0000843;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000844; X86-SSE41-LABEL: merge_8i16_i16_45u7zzzz:
845; X86-SSE41: # %bb.0:
846; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
847; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
848; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200849 %ptr0 = getelementptr inbounds i16, ptr %ptr, i64 4
850 %ptr1 = getelementptr inbounds i16, ptr %ptr, i64 5
851 %ptr3 = getelementptr inbounds i16, ptr %ptr, i64 7
852 %val0 = load i16, ptr %ptr0
853 %val1 = load i16, ptr %ptr1
854 %val3 = load i16, ptr %ptr3
Simon Pilgrimcff85502016-01-15 09:52:50 +0000855 %res0 = insertelement <8 x i16> undef, i16 %val0, i32 0
856 %res1 = insertelement <8 x i16> %res0, i16 %val1, i32 1
857 %res3 = insertelement <8 x i16> %res1, i16 %val3, i32 3
858 %res4 = insertelement <8 x i16> %res3, i16 0, i32 4
859 %res5 = insertelement <8 x i16> %res4, i16 0, i32 5
860 %res6 = insertelement <8 x i16> %res5, i16 0, i32 6
861 %res7 = insertelement <8 x i16> %res6, i16 0, i32 7
862 ret <8 x i16> %res7
863}
864
Nikita Popov2f448bf2022-06-22 14:33:12 +0200865define <16 x i8> @merge_16i8_i8_01u3456789ABCDuF(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrimcff85502016-01-15 09:52:50 +0000866; SSE-LABEL: merge_16i8_i8_01u3456789ABCDuF:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000867; SSE: # %bb.0:
Simon Pilgrimcff85502016-01-15 09:52:50 +0000868; SSE-NEXT: movups (%rdi), %xmm0
869; SSE-NEXT: retq
870;
871; AVX-LABEL: merge_16i8_i8_01u3456789ABCDuF:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000872; AVX: # %bb.0:
Simon Pilgrimcff85502016-01-15 09:52:50 +0000873; AVX-NEXT: vmovups (%rdi), %xmm0
874; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000875;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000876; X86-SSE1-LABEL: merge_16i8_i8_01u3456789ABCDuF:
877; X86-SSE1: # %bb.0:
878; X86-SSE1-NEXT: pushl %ebp
879; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
880; X86-SSE1-NEXT: pushl %ebx
881; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
882; X86-SSE1-NEXT: pushl %edi
883; X86-SSE1-NEXT: .cfi_def_cfa_offset 16
884; X86-SSE1-NEXT: pushl %esi
885; X86-SSE1-NEXT: .cfi_def_cfa_offset 20
886; X86-SSE1-NEXT: .cfi_offset %esi, -20
887; X86-SSE1-NEXT: .cfi_offset %edi, -16
888; X86-SSE1-NEXT: .cfi_offset %ebx, -12
889; X86-SSE1-NEXT: .cfi_offset %ebp, -8
890; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
891; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
892; X86-SSE1-NEXT: movzwl (%ecx), %ebp
893; X86-SSE1-NEXT: movl 3(%ecx), %esi
894; X86-SSE1-NEXT: movl 7(%ecx), %edi
895; X86-SSE1-NEXT: movzwl 11(%ecx), %ebx
Sanjay Patelf0dd12e2022-07-19 21:25:41 -0400896; X86-SSE1-NEXT: movzbl 13(%ecx), %edx
897; X86-SSE1-NEXT: movzbl 15(%ecx), %ecx
Simon Pilgrim793192d2020-12-02 16:10:50 +0000898; X86-SSE1-NEXT: movb %dl, 13(%eax)
899; X86-SSE1-NEXT: movb %cl, 15(%eax)
900; X86-SSE1-NEXT: movw %bx, 11(%eax)
901; X86-SSE1-NEXT: movl %edi, 7(%eax)
902; X86-SSE1-NEXT: movl %esi, 3(%eax)
903; X86-SSE1-NEXT: movw %bp, (%eax)
904; X86-SSE1-NEXT: popl %esi
905; X86-SSE1-NEXT: .cfi_def_cfa_offset 16
906; X86-SSE1-NEXT: popl %edi
907; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
908; X86-SSE1-NEXT: popl %ebx
909; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
910; X86-SSE1-NEXT: popl %ebp
911; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
912; X86-SSE1-NEXT: retl $4
Simon Pilgrimab7c46e2016-08-18 13:41:26 +0000913;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000914; X86-SSE41-LABEL: merge_16i8_i8_01u3456789ABCDuF:
915; X86-SSE41: # %bb.0:
916; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
917; X86-SSE41-NEXT: movups (%eax), %xmm0
918; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200919 %ptr1 = getelementptr inbounds i8, ptr %ptr, i64 1
920 %ptr3 = getelementptr inbounds i8, ptr %ptr, i64 3
921 %ptr4 = getelementptr inbounds i8, ptr %ptr, i64 4
922 %ptr5 = getelementptr inbounds i8, ptr %ptr, i64 5
923 %ptr6 = getelementptr inbounds i8, ptr %ptr, i64 6
924 %ptr7 = getelementptr inbounds i8, ptr %ptr, i64 7
925 %ptr8 = getelementptr inbounds i8, ptr %ptr, i64 8
926 %ptr9 = getelementptr inbounds i8, ptr %ptr, i64 9
927 %ptrA = getelementptr inbounds i8, ptr %ptr, i64 10
928 %ptrB = getelementptr inbounds i8, ptr %ptr, i64 11
929 %ptrC = getelementptr inbounds i8, ptr %ptr, i64 12
930 %ptrD = getelementptr inbounds i8, ptr %ptr, i64 13
931 %ptrF = getelementptr inbounds i8, ptr %ptr, i64 15
932 %val0 = load i8, ptr %ptr
933 %val1 = load i8, ptr %ptr1
934 %val3 = load i8, ptr %ptr3
935 %val4 = load i8, ptr %ptr4
936 %val5 = load i8, ptr %ptr5
937 %val6 = load i8, ptr %ptr6
938 %val7 = load i8, ptr %ptr7
939 %val8 = load i8, ptr %ptr8
940 %val9 = load i8, ptr %ptr9
941 %valA = load i8, ptr %ptrA
942 %valB = load i8, ptr %ptrB
943 %valC = load i8, ptr %ptrC
944 %valD = load i8, ptr %ptrD
945 %valF = load i8, ptr %ptrF
Simon Pilgrimcff85502016-01-15 09:52:50 +0000946 %res0 = insertelement <16 x i8> undef, i8 %val0, i32 0
947 %res1 = insertelement <16 x i8> %res0, i8 %val1, i32 1
948 %res3 = insertelement <16 x i8> %res1, i8 %val3, i32 3
949 %res4 = insertelement <16 x i8> %res3, i8 %val4, i32 4
950 %res5 = insertelement <16 x i8> %res4, i8 %val5, i32 5
951 %res6 = insertelement <16 x i8> %res5, i8 %val6, i32 6
952 %res7 = insertelement <16 x i8> %res6, i8 %val7, i32 7
953 %res8 = insertelement <16 x i8> %res7, i8 %val8, i32 8
954 %res9 = insertelement <16 x i8> %res8, i8 %val9, i32 9
955 %resA = insertelement <16 x i8> %res9, i8 %valA, i32 10
956 %resB = insertelement <16 x i8> %resA, i8 %valB, i32 11
957 %resC = insertelement <16 x i8> %resB, i8 %valC, i32 12
958 %resD = insertelement <16 x i8> %resC, i8 %valD, i32 13
959 %resF = insertelement <16 x i8> %resD, i8 %valF, i32 15
960 ret <16 x i8> %resF
961}
962
Nikita Popov2f448bf2022-06-22 14:33:12 +0200963define <16 x i8> @merge_16i8_i8_01u3uuzzuuuuuzzz(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrim6788f332016-02-04 16:12:56 +0000964; SSE-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000965; SSE: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +0000966; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Simon Pilgrim6788f332016-02-04 16:12:56 +0000967; SSE-NEXT: retq
Simon Pilgrimcff85502016-01-15 09:52:50 +0000968;
969; AVX-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000970; AVX: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +0000971; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Simon Pilgrimcff85502016-01-15 09:52:50 +0000972; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +0000973;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000974; X86-SSE1-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
975; X86-SSE1: # %bb.0:
976; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
977; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
978; X86-SSE1-NEXT: movzwl (%ecx), %edx
Sanjay Patelf0dd12e2022-07-19 21:25:41 -0400979; X86-SSE1-NEXT: movzbl 3(%ecx), %ecx
Simon Pilgrim793192d2020-12-02 16:10:50 +0000980; X86-SSE1-NEXT: movb %cl, 3(%eax)
981; X86-SSE1-NEXT: movw %dx, (%eax)
982; X86-SSE1-NEXT: movb $0, 15(%eax)
983; X86-SSE1-NEXT: movw $0, 13(%eax)
984; X86-SSE1-NEXT: movw $0, 6(%eax)
985; X86-SSE1-NEXT: retl $4
Simon Pilgrimab7c46e2016-08-18 13:41:26 +0000986;
Simon Pilgrim793192d2020-12-02 16:10:50 +0000987; X86-SSE41-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
988; X86-SSE41: # %bb.0:
989; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
990; X86-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
991; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +0200992 %ptr1 = getelementptr inbounds i8, ptr %ptr, i64 1
993 %ptr3 = getelementptr inbounds i8, ptr %ptr, i64 3
994 %val0 = load i8, ptr %ptr
995 %val1 = load i8, ptr %ptr1
996 %val3 = load i8, ptr %ptr3
Simon Pilgrimcff85502016-01-15 09:52:50 +0000997 %res0 = insertelement <16 x i8> undef, i8 %val0, i32 0
998 %res1 = insertelement <16 x i8> %res0, i8 %val1, i32 1
999 %res3 = insertelement <16 x i8> %res1, i8 %val3, i32 3
1000 %res6 = insertelement <16 x i8> %res3, i8 0, i32 6
1001 %res7 = insertelement <16 x i8> %res6, i8 0, i32 7
1002 %resD = insertelement <16 x i8> %res7, i8 0, i32 13
1003 %resE = insertelement <16 x i8> %resD, i8 0, i32 14
1004 %resF = insertelement <16 x i8> %resE, i8 0, i32 15
1005 ret <16 x i8> %resF
1006}
1007
Nikita Popov2f448bf2022-06-22 14:33:12 +02001008define <16 x i8> @merge_16i8_i8_0123uu67uuuuuzzz(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrim46696ef2016-01-26 09:30:08 +00001009; SSE-LABEL: merge_16i8_i8_0123uu67uuuuuzzz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001010; SSE: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +00001011; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrim46696ef2016-01-26 09:30:08 +00001012; SSE-NEXT: retq
Simon Pilgrimcff85502016-01-15 09:52:50 +00001013;
1014; AVX-LABEL: merge_16i8_i8_0123uu67uuuuuzzz:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001015; AVX: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +00001016; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
Simon Pilgrimcff85502016-01-15 09:52:50 +00001017; AVX-NEXT: retq
Simon Pilgrim528e94e2016-02-04 15:51:55 +00001018;
Simon Pilgrim793192d2020-12-02 16:10:50 +00001019; X86-SSE1-LABEL: merge_16i8_i8_0123uu67uuuuuzzz:
1020; X86-SSE1: # %bb.0:
1021; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
1022; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
1023; X86-SSE1-NEXT: movl (%ecx), %edx
1024; X86-SSE1-NEXT: movzwl 6(%ecx), %ecx
1025; X86-SSE1-NEXT: movw %cx, 6(%eax)
1026; X86-SSE1-NEXT: movl %edx, (%eax)
1027; X86-SSE1-NEXT: movb $0, 15(%eax)
1028; X86-SSE1-NEXT: movw $0, 13(%eax)
1029; X86-SSE1-NEXT: retl $4
Simon Pilgrimab7c46e2016-08-18 13:41:26 +00001030;
Simon Pilgrim793192d2020-12-02 16:10:50 +00001031; X86-SSE41-LABEL: merge_16i8_i8_0123uu67uuuuuzzz:
1032; X86-SSE41: # %bb.0:
1033; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
1034; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1035; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +02001036 %ptr1 = getelementptr inbounds i8, ptr %ptr, i64 1
1037 %ptr2 = getelementptr inbounds i8, ptr %ptr, i64 2
1038 %ptr3 = getelementptr inbounds i8, ptr %ptr, i64 3
1039 %ptr6 = getelementptr inbounds i8, ptr %ptr, i64 6
1040 %ptr7 = getelementptr inbounds i8, ptr %ptr, i64 7
1041 %val0 = load i8, ptr %ptr
1042 %val1 = load i8, ptr %ptr1
1043 %val2 = load i8, ptr %ptr2
1044 %val3 = load i8, ptr %ptr3
1045 %val6 = load i8, ptr %ptr6
1046 %val7 = load i8, ptr %ptr7
Simon Pilgrimcff85502016-01-15 09:52:50 +00001047 %res0 = insertelement <16 x i8> undef, i8 %val0, i32 0
1048 %res1 = insertelement <16 x i8> %res0, i8 %val1, i32 1
1049 %res2 = insertelement <16 x i8> %res1, i8 %val2, i32 2
1050 %res3 = insertelement <16 x i8> %res2, i8 %val3, i32 3
1051 %res6 = insertelement <16 x i8> %res3, i8 %val6, i32 6
1052 %res7 = insertelement <16 x i8> %res6, i8 %val7, i32 7
1053 %resD = insertelement <16 x i8> %res7, i8 0, i32 13
1054 %resE = insertelement <16 x i8> %resD, i8 0, i32 14
Simon Pilgrim63b1eca2016-02-06 15:38:25 +00001055 %resF = insertelement <16 x i8> %resE, i8 0, i32 15
1056 ret <16 x i8> %resF
1057}
1058
Nikita Popov2f448bf2022-06-22 14:33:12 +02001059define void @merge_4i32_i32_combine(ptr %dst, ptr %src) {
Simon Pilgrim63b1eca2016-02-06 15:38:25 +00001060; SSE-LABEL: merge_4i32_i32_combine:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001061; SSE: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +00001062; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1063; SSE-NEXT: movaps %xmm0, (%rdi)
Simon Pilgrim63b1eca2016-02-06 15:38:25 +00001064; SSE-NEXT: retq
1065;
Simon Pilgrim8893bd92016-12-07 12:10:49 +00001066; AVX-LABEL: merge_4i32_i32_combine:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001067; AVX: # %bb.0:
Simon Pilgrimd7518892016-12-15 16:05:29 +00001068; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1069; AVX-NEXT: vmovaps %xmm0, (%rdi)
Simon Pilgrim8893bd92016-12-07 12:10:49 +00001070; AVX-NEXT: retq
Simon Pilgrim63b1eca2016-02-06 15:38:25 +00001071;
Simon Pilgrim793192d2020-12-02 16:10:50 +00001072; X86-SSE1-LABEL: merge_4i32_i32_combine:
1073; X86-SSE1: # %bb.0:
1074; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
1075; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
1076; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Simon Pilgrima2a00892024-01-19 14:21:26 +00001077; X86-SSE1-NEXT: movss {{.*#+}} xmm1 = [NaN,0.0E+0,0.0E+0,0.0E+0]
Simon Pilgrim793192d2020-12-02 16:10:50 +00001078; X86-SSE1-NEXT: andps %xmm0, %xmm1
1079; X86-SSE1-NEXT: movaps %xmm1, (%eax)
1080; X86-SSE1-NEXT: retl
Simon Pilgrimab7c46e2016-08-18 13:41:26 +00001081;
Simon Pilgrim793192d2020-12-02 16:10:50 +00001082; X86-SSE41-LABEL: merge_4i32_i32_combine:
1083; X86-SSE41: # %bb.0:
1084; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
1085; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
1086; X86-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1087; X86-SSE41-NEXT: movaps %xmm0, (%eax)
1088; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +02001089 %1 = load i32, ptr %src
1090 %2 = insertelement <4 x i32> undef, i32 %1, i32 0
1091 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
1092 %4 = lshr <4 x i32> %3, <i32 0, i32 undef, i32 undef, i32 undef>
1093 %5 = and <4 x i32> %4, <i32 -1, i32 0, i32 0, i32 0>
1094 store <4 x i32> %5, ptr %dst
Simon Pilgrim63b1eca2016-02-06 15:38:25 +00001095 ret void
1096}
Simon Pilgrim01108902016-03-24 00:14:37 +00001097
1098;
1099; consecutive loads including any/all volatiles may not be combined
1100;
1101
Nikita Popov2f448bf2022-06-22 14:33:12 +02001102define <2 x i64> @merge_2i64_i64_12_volatile(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrim01108902016-03-24 00:14:37 +00001103; SSE-LABEL: merge_2i64_i64_12_volatile:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001104; SSE: # %bb.0:
Craig Topper87f73812017-09-18 03:29:54 +00001105; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1106; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
Craig Toppera6054322017-09-18 04:40:58 +00001107; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim01108902016-03-24 00:14:37 +00001108; SSE-NEXT: retq
1109;
1110; AVX-LABEL: merge_2i64_i64_12_volatile:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001111; AVX: # %bb.0:
Craig Topper87f73812017-09-18 03:29:54 +00001112; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1113; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
Craig Toppera6054322017-09-18 04:40:58 +00001114; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim01108902016-03-24 00:14:37 +00001115; AVX-NEXT: retq
1116;
Simon Pilgrim793192d2020-12-02 16:10:50 +00001117; X86-SSE1-LABEL: merge_2i64_i64_12_volatile:
1118; X86-SSE1: # %bb.0:
1119; X86-SSE1-NEXT: pushl %edi
1120; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
1121; X86-SSE1-NEXT: pushl %esi
1122; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
1123; X86-SSE1-NEXT: .cfi_offset %esi, -12
1124; X86-SSE1-NEXT: .cfi_offset %edi, -8
1125; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
1126; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
1127; X86-SSE1-NEXT: movl 8(%ecx), %edx
1128; X86-SSE1-NEXT: movl 12(%ecx), %esi
1129; X86-SSE1-NEXT: movl 16(%ecx), %edi
1130; X86-SSE1-NEXT: movl 20(%ecx), %ecx
1131; X86-SSE1-NEXT: movl %ecx, 12(%eax)
1132; X86-SSE1-NEXT: movl %edi, 8(%eax)
1133; X86-SSE1-NEXT: movl %esi, 4(%eax)
1134; X86-SSE1-NEXT: movl %edx, (%eax)
1135; X86-SSE1-NEXT: popl %esi
1136; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
1137; X86-SSE1-NEXT: popl %edi
1138; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
1139; X86-SSE1-NEXT: retl $4
Simon Pilgrimab7c46e2016-08-18 13:41:26 +00001140;
Simon Pilgrim793192d2020-12-02 16:10:50 +00001141; X86-SSE41-LABEL: merge_2i64_i64_12_volatile:
1142; X86-SSE41: # %bb.0:
1143; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
1144; X86-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1145; X86-SSE41-NEXT: pinsrd $1, 12(%eax), %xmm0
1146; X86-SSE41-NEXT: pinsrd $2, 16(%eax), %xmm0
1147; X86-SSE41-NEXT: pinsrd $3, 20(%eax), %xmm0
1148; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +02001149 %ptr0 = getelementptr inbounds i64, ptr %ptr, i64 1
1150 %ptr1 = getelementptr inbounds i64, ptr %ptr, i64 2
1151 %val0 = load volatile i64, ptr %ptr0
1152 %val1 = load volatile i64, ptr %ptr1
Simon Pilgrim01108902016-03-24 00:14:37 +00001153 %res0 = insertelement <2 x i64> undef, i64 %val0, i32 0
1154 %res1 = insertelement <2 x i64> %res0, i64 %val1, i32 1
1155 ret <2 x i64> %res1
1156}
1157
Nikita Popov2f448bf2022-06-22 14:33:12 +02001158define <4 x float> @merge_4f32_f32_2345_volatile(ptr %ptr) nounwind uwtable noinline ssp {
Simon Pilgrim01108902016-03-24 00:14:37 +00001159; SSE2-LABEL: merge_4f32_f32_2345_volatile:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001160; SSE2: # %bb.0:
Nirav Dave54e22f32017-03-14 00:34:14 +00001161; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Nirav Dave54e22f32017-03-14 00:34:14 +00001162; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1163; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
Craig Toppere7532472019-07-06 17:59:57 +00001164; SSE2-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
Simon Pilgrim01108902016-03-24 00:14:37 +00001165; SSE2-NEXT: retq
1166;
1167; SSE41-LABEL: merge_4f32_f32_2345_volatile:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001168; SSE41: # %bb.0:
Simon Pilgrim01108902016-03-24 00:14:37 +00001169; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1170; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
1171; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
1172; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
1173; SSE41-NEXT: retq
1174;
1175; AVX-LABEL: merge_4f32_f32_2345_volatile:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001176; AVX: # %bb.0:
Simon Pilgrim01108902016-03-24 00:14:37 +00001177; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1178; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
1179; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
1180; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
1181; AVX-NEXT: retq
1182;
Simon Pilgrim793192d2020-12-02 16:10:50 +00001183; X86-SSE1-LABEL: merge_4f32_f32_2345_volatile:
1184; X86-SSE1: # %bb.0:
1185; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
1186; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1187; X86-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1188; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1189; X86-SSE1-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
1190; X86-SSE1-NEXT: retl
Simon Pilgrimab7c46e2016-08-18 13:41:26 +00001191;
Simon Pilgrim793192d2020-12-02 16:10:50 +00001192; X86-SSE41-LABEL: merge_4f32_f32_2345_volatile:
1193; X86-SSE41: # %bb.0:
1194; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
1195; X86-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1196; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
1197; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
1198; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
1199; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +02001200 %ptr0 = getelementptr inbounds float, ptr %ptr, i64 2
1201 %ptr1 = getelementptr inbounds float, ptr %ptr, i64 3
1202 %ptr2 = getelementptr inbounds float, ptr %ptr, i64 4
1203 %ptr3 = getelementptr inbounds float, ptr %ptr, i64 5
1204 %val0 = load volatile float, ptr %ptr0
1205 %val1 = load float, ptr %ptr1
1206 %val2 = load float, ptr %ptr2
1207 %val3 = load float, ptr %ptr3
Simon Pilgrim01108902016-03-24 00:14:37 +00001208 %res0 = insertelement <4 x float> undef, float %val0, i32 0
1209 %res1 = insertelement <4 x float> %res0, float %val1, i32 1
1210 %res2 = insertelement <4 x float> %res1, float %val2, i32 2
1211 %res3 = insertelement <4 x float> %res2, float %val3, i32 3
1212 ret <4 x float> %res3
1213}
Simon Pilgrimc49bd2e2016-03-30 20:52:24 +00001214
1215;
1216; Non-consecutive test.
1217;
1218
Nikita Popov2f448bf2022-06-22 14:33:12 +02001219define <4 x float> @merge_4f32_f32_X0YY(ptr %ptr0, ptr %ptr1) nounwind uwtable noinline ssp {
Michael Kuperstein61298872017-04-06 22:33:25 +00001220; SSE-LABEL: merge_4f32_f32_X0YY:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001221; SSE: # %bb.0:
Michael Kuperstein61298872017-04-06 22:33:25 +00001222; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1223; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1224; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1225; SSE-NEXT: retq
Simon Pilgrimc49bd2e2016-03-30 20:52:24 +00001226;
1227; AVX-LABEL: merge_4f32_f32_X0YY:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001228; AVX: # %bb.0:
Simon Pilgrimc49bd2e2016-03-30 20:52:24 +00001229; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Michael Kuperstein61298872017-04-06 22:33:25 +00001230; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1231; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0,0]
Simon Pilgrimc49bd2e2016-03-30 20:52:24 +00001232; AVX-NEXT: retq
1233;
Simon Pilgrim793192d2020-12-02 16:10:50 +00001234; X86-SSE-LABEL: merge_4f32_f32_X0YY:
1235; X86-SSE: # %bb.0:
1236; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
1237; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
1238; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1239; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1240; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1241; X86-SSE-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +02001242 %val0 = load float, ptr %ptr0, align 4
1243 %val1 = load float, ptr %ptr1, align 4
Simon Pilgrimc49bd2e2016-03-30 20:52:24 +00001244 %res0 = insertelement <4 x float> undef, float %val0, i32 0
1245 %res1 = insertelement <4 x float> %res0, float 0.000000e+00, i32 1
1246 %res2 = insertelement <4 x float> %res1, float %val1, i32 2
1247 %res3 = insertelement <4 x float> %res2, float %val1, i32 3
1248 ret <4 x float> %res3
1249}
Simon Pilgrime5215752017-02-16 19:17:36 +00001250
1251;
1252; Extension tests.
1253;
1254
Simon Pilgrima4c350f2017-02-17 20:43:32 +00001255; PR31309
Nikita Popov2f448bf2022-06-22 14:33:12 +02001256define <4 x i32> @load_i32_zext_i128_v4i32(ptr %ptr) {
Simon Pilgrime5215752017-02-16 19:17:36 +00001257; SSE-LABEL: load_i32_zext_i128_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001258; SSE: # %bb.0:
Simon Pilgrima4c350f2017-02-17 20:43:32 +00001259; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Simon Pilgrime5215752017-02-16 19:17:36 +00001260; SSE-NEXT: retq
1261;
1262; AVX-LABEL: load_i32_zext_i128_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001263; AVX: # %bb.0:
Simon Pilgrima4c350f2017-02-17 20:43:32 +00001264; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
Simon Pilgrime5215752017-02-16 19:17:36 +00001265; AVX-NEXT: retq
1266;
Simon Pilgrim793192d2020-12-02 16:10:50 +00001267; X86-SSE1-LABEL: load_i32_zext_i128_v4i32:
1268; X86-SSE1: # %bb.0:
1269; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
1270; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
1271; X86-SSE1-NEXT: movl (%ecx), %ecx
1272; X86-SSE1-NEXT: movl %ecx, (%eax)
1273; X86-SSE1-NEXT: movl $0, 12(%eax)
1274; X86-SSE1-NEXT: movl $0, 8(%eax)
1275; X86-SSE1-NEXT: movl $0, 4(%eax)
1276; X86-SSE1-NEXT: retl $4
Simon Pilgrime5215752017-02-16 19:17:36 +00001277;
Simon Pilgrim793192d2020-12-02 16:10:50 +00001278; X86-SSE41-LABEL: load_i32_zext_i128_v4i32:
1279; X86-SSE41: # %bb.0:
1280; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
1281; X86-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1282; X86-SSE41-NEXT: retl
Nikita Popov2f448bf2022-06-22 14:33:12 +02001283 %1 = load i32, ptr %ptr
Simon Pilgrime5215752017-02-16 19:17:36 +00001284 %2 = zext i32 %1 to i128
1285 %3 = bitcast i128 %2 to <4 x i32>
1286 ret <4 x i32> %3
1287}