blob: c7087037e0104e8329db6e76091c9bc3a51710bb [file] [log] [blame]
Simon Pilgrim49695dd2016-10-08 18:51:55 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Simon Pilgrim5fcf28d2015-09-05 10:19:07 +00002; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
Craig Topper6990d612016-04-20 05:19:01 +00004; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CDVL
Craig Topper758524f2016-04-22 03:22:38 +00005; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,-avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CD
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +00006; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ
Craig Topper196a5602017-12-16 02:40:28 +00007; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQVL
Coby Tayree09a18aa2017-11-21 10:32:42 +00008; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=BITALG_NOVLX
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG
Simon Pilgrim4b5784c2016-10-21 10:50:52 +000010;
11; Just one 32-bit run to make sure we do reasonable things for i64 tzcnt.
12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-AVX --check-prefix=X32-AVX2
Simon Pilgrim30d36cc2015-06-07 21:01:34 +000013
Simon Pilgrim68688c12015-07-19 17:09:43 +000014define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
Simon Pilgrim30d36cc2015-06-07 21:01:34 +000015; AVX1-LABEL: testv4i64:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +000016; AVX1: # %bb.0:
Simon Pilgrim30d36cc2015-06-07 21:01:34 +000017; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Simon Pilgrime76d7092018-10-13 12:12:06 +000018; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
19; AVX1-NEXT: vpaddq %xmm2, %xmm1, %xmm3
20; AVX1-NEXT: vpandn %xmm3, %xmm1, %xmm1
21; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm4
23; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4
Simon Pilgrimafa71f42015-09-19 13:22:57 +000025; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
Simon Pilgrime76d7092018-10-13 12:12:06 +000026; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
27; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1
28; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1
29; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
30; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
31; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm2
32; AVX1-NEXT: vpandn %xmm2, %xmm0, %xmm0
33; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm2
34; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2
Simon Pilgrimafa71f42015-09-19 13:22:57 +000035; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
Simon Pilgrime76d7092018-10-13 12:12:06 +000036; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
37; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0
38; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
39; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +000040; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
41; AVX1-NEXT: retq
42;
43; AVX2-LABEL: testv4i64:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +000044; AVX2: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +000045; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
46; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm1
47; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0
48; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
49; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
50; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
51; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
Simon Pilgrimafa71f42015-09-19 13:22:57 +000052; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
Simon Pilgrime76d7092018-10-13 12:12:06 +000053; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
54; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
55; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
56; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
Cong Hou5c1d0fd2015-11-24 19:51:26 +000057; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +000058; AVX2-NEXT: retq
Craig Topper6990d612016-04-20 05:19:01 +000059;
60; AVX512CDVL-LABEL: testv4i64:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +000061; AVX512CDVL: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +000062; AVX512CDVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
63; AVX512CDVL-NEXT: vpaddq %ymm1, %ymm0, %ymm1
64; AVX512CDVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrim3b18a972018-10-13 13:05:19 +000065; AVX512CDVL-NEXT: vplzcntq %ymm0, %ymm0
66; AVX512CDVL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [64,64,64,64]
67; AVX512CDVL-NEXT: vpsubq %ymm0, %ymm1, %ymm0
Craig Topper6990d612016-04-20 05:19:01 +000068; AVX512CDVL-NEXT: retq
Craig Topper758524f2016-04-22 03:22:38 +000069;
70; AVX512CD-LABEL: testv4i64:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +000071; AVX512CD: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +000072; AVX512CD-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
73; AVX512CD-NEXT: vpaddq %ymm1, %ymm0, %ymm1
74; AVX512CD-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrim3b18a972018-10-13 13:05:19 +000075; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
76; AVX512CD-NEXT: vpbroadcastq {{.*#+}} ymm1 = [64,64,64,64]
77; AVX512CD-NEXT: vpsubq %ymm0, %ymm1, %ymm0
Craig Topper758524f2016-04-22 03:22:38 +000078; AVX512CD-NEXT: retq
Simon Pilgrim4b5784c2016-10-21 10:50:52 +000079;
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +000080; AVX512VPOPCNTDQ-LABEL: testv4i64:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +000081; AVX512VPOPCNTDQ: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +000082; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +000083; AVX512VPOPCNTDQ-NEXT: vpaddq %ymm1, %ymm0, %ymm1
84; AVX512VPOPCNTDQ-NEXT: vpandn %ymm1, %ymm0, %ymm0
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +000085; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
Puyan Lotfi10769692018-01-31 22:04:26 +000086; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +000087; AVX512VPOPCNTDQ-NEXT: retq
88;
Craig Topper196a5602017-12-16 02:40:28 +000089; AVX512VPOPCNTDQVL-LABEL: testv4i64:
90; AVX512VPOPCNTDQVL: # %bb.0:
Craig Topper196a5602017-12-16 02:40:28 +000091; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +000092; AVX512VPOPCNTDQVL-NEXT: vpaddq %ymm1, %ymm0, %ymm1
93; AVX512VPOPCNTDQVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper196a5602017-12-16 02:40:28 +000094; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0
95; AVX512VPOPCNTDQVL-NEXT: retq
96;
Craig Topper54ab0be2017-11-22 23:35:12 +000097; BITALG_NOVLX-LABEL: testv4i64:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +000098; BITALG_NOVLX: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +000099; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
100; BITALG_NOVLX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
101; BITALG_NOVLX-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrimc4e53cf2018-10-12 14:18:47 +0000102; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000103; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
Craig Topper54ab0be2017-11-22 23:35:12 +0000104; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
105; BITALG_NOVLX-NEXT: retq
106;
107; BITALG-LABEL: testv4i64:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000108; BITALG: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000109; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
110; BITALG-NEXT: vpaddq %ymm1, %ymm0, %ymm1
111; BITALG-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrimc4e53cf2018-10-12 14:18:47 +0000112; BITALG-NEXT: vpopcntb %ymm0, %ymm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000113; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
Craig Topper54ab0be2017-11-22 23:35:12 +0000114; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
115; BITALG-NEXT: retq
116;
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000117; X32-AVX-LABEL: testv4i64:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000118; X32-AVX: # %bb.0:
Simon Pilgrim097be3b2018-10-13 14:45:44 +0000119; X32-AVX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
120; X32-AVX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000121; X32-AVX-NEXT: vpandn %ymm1, %ymm0, %ymm0
122; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
123; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2
124; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
125; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000126; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000127; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
128; X32-AVX-NEXT: vpshufb %ymm0, %ymm3, %ymm0
129; X32-AVX-NEXT: vpaddb %ymm2, %ymm0, %ymm0
130; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000131; X32-AVX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
132; X32-AVX-NEXT: retl
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000133 %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %in, i1 0)
134 ret <4 x i64> %out
135}
136
Simon Pilgrim68688c12015-07-19 17:09:43 +0000137define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000138; AVX1-LABEL: testv4i64u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000139; AVX1: # %bb.0:
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000140; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000141; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
142; AVX1-NEXT: vpaddq %xmm2, %xmm1, %xmm3
143; AVX1-NEXT: vpandn %xmm3, %xmm1, %xmm1
144; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
145; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm4
146; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
147; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000148; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000149; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
150; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1
151; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1
152; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
153; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
154; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm2
155; AVX1-NEXT: vpandn %xmm2, %xmm0, %xmm0
156; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm2
157; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000158; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000159; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
160; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0
161; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
162; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000163; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
164; AVX1-NEXT: retq
165;
166; AVX2-LABEL: testv4i64u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000167; AVX2: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000168; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
169; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm1
170; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0
171; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
172; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
173; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
174; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000175; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000176; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
177; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
178; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
179; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
Cong Hou5c1d0fd2015-11-24 19:51:26 +0000180; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000181; AVX2-NEXT: retq
Craig Topper6990d612016-04-20 05:19:01 +0000182;
183; AVX512CDVL-LABEL: testv4i64u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000184; AVX512CDVL: # %bb.0:
Simon Pilgrim3b18a972018-10-13 13:05:19 +0000185; AVX512CDVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
186; AVX512CDVL-NEXT: vpaddq %ymm1, %ymm0, %ymm1
187; AVX512CDVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper6990d612016-04-20 05:19:01 +0000188; AVX512CDVL-NEXT: vplzcntq %ymm0, %ymm0
Simon Pilgrim3b18a972018-10-13 13:05:19 +0000189; AVX512CDVL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [64,64,64,64]
Craig Topper6990d612016-04-20 05:19:01 +0000190; AVX512CDVL-NEXT: vpsubq %ymm0, %ymm1, %ymm0
191; AVX512CDVL-NEXT: retq
Craig Topper758524f2016-04-22 03:22:38 +0000192;
193; AVX512CD-LABEL: testv4i64u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000194; AVX512CD: # %bb.0:
Simon Pilgrim3b18a972018-10-13 13:05:19 +0000195; AVX512CD-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
196; AVX512CD-NEXT: vpaddq %ymm1, %ymm0, %ymm1
197; AVX512CD-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper758524f2016-04-22 03:22:38 +0000198; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
Simon Pilgrim3b18a972018-10-13 13:05:19 +0000199; AVX512CD-NEXT: vpbroadcastq {{.*#+}} ymm1 = [64,64,64,64]
Craig Topper758524f2016-04-22 03:22:38 +0000200; AVX512CD-NEXT: vpsubq %ymm0, %ymm1, %ymm0
201; AVX512CD-NEXT: retq
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000202;
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +0000203; AVX512VPOPCNTDQ-LABEL: testv4i64u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000204; AVX512VPOPCNTDQ: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000205; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000206; AVX512VPOPCNTDQ-NEXT: vpaddq %ymm1, %ymm0, %ymm1
207; AVX512VPOPCNTDQ-NEXT: vpandn %ymm1, %ymm0, %ymm0
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +0000208; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
Puyan Lotfi10769692018-01-31 22:04:26 +0000209; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +0000210; AVX512VPOPCNTDQ-NEXT: retq
211;
Craig Topper196a5602017-12-16 02:40:28 +0000212; AVX512VPOPCNTDQVL-LABEL: testv4i64u:
213; AVX512VPOPCNTDQVL: # %bb.0:
Craig Topper196a5602017-12-16 02:40:28 +0000214; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000215; AVX512VPOPCNTDQVL-NEXT: vpaddq %ymm1, %ymm0, %ymm1
216; AVX512VPOPCNTDQVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper196a5602017-12-16 02:40:28 +0000217; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0
218; AVX512VPOPCNTDQVL-NEXT: retq
219;
Craig Topper54ab0be2017-11-22 23:35:12 +0000220; BITALG_NOVLX-LABEL: testv4i64u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000221; BITALG_NOVLX: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000222; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
223; BITALG_NOVLX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
224; BITALG_NOVLX-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrimc4e53cf2018-10-12 14:18:47 +0000225; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000226; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
Craig Topper54ab0be2017-11-22 23:35:12 +0000227; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
228; BITALG_NOVLX-NEXT: retq
229;
230; BITALG-LABEL: testv4i64u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000231; BITALG: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000232; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
233; BITALG-NEXT: vpaddq %ymm1, %ymm0, %ymm1
234; BITALG-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrimc4e53cf2018-10-12 14:18:47 +0000235; BITALG-NEXT: vpopcntb %ymm0, %ymm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000236; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
Craig Topper54ab0be2017-11-22 23:35:12 +0000237; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
238; BITALG-NEXT: retq
239;
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000240; X32-AVX-LABEL: testv4i64u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000241; X32-AVX: # %bb.0:
Simon Pilgrim097be3b2018-10-13 14:45:44 +0000242; X32-AVX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
243; X32-AVX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000244; X32-AVX-NEXT: vpandn %ymm1, %ymm0, %ymm0
245; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
246; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2
247; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
248; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000249; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000250; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
251; X32-AVX-NEXT: vpshufb %ymm0, %ymm3, %ymm0
252; X32-AVX-NEXT: vpaddb %ymm2, %ymm0, %ymm0
253; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000254; X32-AVX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
255; X32-AVX-NEXT: retl
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000256 %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %in, i1 -1)
257 ret <4 x i64> %out
258}
259
Simon Pilgrim68688c12015-07-19 17:09:43 +0000260define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000261; AVX1-LABEL: testv8i32:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000262; AVX1: # %bb.0:
Sanjay Patel8f61a6e2017-05-26 15:33:18 +0000263; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000264; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
265; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm3
266; AVX1-NEXT: vpandn %xmm3, %xmm1, %xmm1
267; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
268; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm4
269; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
270; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4
Sanjay Patel8f61a6e2017-05-26 15:33:18 +0000271; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000272; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
273; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1
274; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1
275; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
276; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm6 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
277; AVX1-NEXT: vpsadbw %xmm4, %xmm6, %xmm6
Sanjay Patel8f61a6e2017-05-26 15:33:18 +0000278; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
Simon Pilgrime76d7092018-10-13 12:12:06 +0000279; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
280; AVX1-NEXT: vpackuswb %xmm6, %xmm1, %xmm1
281; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm2
282; AVX1-NEXT: vpandn %xmm2, %xmm0, %xmm0
283; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm2
284; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000285; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000286; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
287; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0
288; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
289; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
290; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
Simon Pilgrimbdcd71d2016-11-29 14:18:51 +0000291; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
Simon Pilgrime76d7092018-10-13 12:12:06 +0000292; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
293; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
Sanjay Patel8f61a6e2017-05-26 15:33:18 +0000294; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000295; AVX1-NEXT: retq
296;
297; AVX2-LABEL: testv8i32:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000298; AVX2: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000299; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
300; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm1
301; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0
302; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
303; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
304; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
305; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000306; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000307; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
308; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
309; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
310; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000311; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
Cong Hou5c1d0fd2015-11-24 19:51:26 +0000312; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000313; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
Cong Hou5c1d0fd2015-11-24 19:51:26 +0000314; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000315; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000316; AVX2-NEXT: retq
Craig Topper6990d612016-04-20 05:19:01 +0000317;
318; AVX512CDVL-LABEL: testv8i32:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000319; AVX512CDVL: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000320; AVX512CDVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
321; AVX512CDVL-NEXT: vpaddd %ymm1, %ymm0, %ymm1
322; AVX512CDVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrim3b18a972018-10-13 13:05:19 +0000323; AVX512CDVL-NEXT: vplzcntd %ymm0, %ymm0
324; AVX512CDVL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32,32,32,32,32,32,32,32]
325; AVX512CDVL-NEXT: vpsubd %ymm0, %ymm1, %ymm0
Craig Topper6990d612016-04-20 05:19:01 +0000326; AVX512CDVL-NEXT: retq
Craig Topper758524f2016-04-22 03:22:38 +0000327;
328; AVX512CD-LABEL: testv8i32:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000329; AVX512CD: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000330; AVX512CD-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
331; AVX512CD-NEXT: vpaddd %ymm1, %ymm0, %ymm1
332; AVX512CD-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrim3b18a972018-10-13 13:05:19 +0000333; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
334; AVX512CD-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32,32,32,32,32,32,32,32]
335; AVX512CD-NEXT: vpsubd %ymm0, %ymm1, %ymm0
Craig Topper758524f2016-04-22 03:22:38 +0000336; AVX512CD-NEXT: retq
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000337;
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +0000338; AVX512VPOPCNTDQ-LABEL: testv8i32:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000339; AVX512VPOPCNTDQ: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000340; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000341; AVX512VPOPCNTDQ-NEXT: vpaddd %ymm1, %ymm0, %ymm1
342; AVX512VPOPCNTDQ-NEXT: vpandn %ymm1, %ymm0, %ymm0
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +0000343; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
Puyan Lotfi10769692018-01-31 22:04:26 +0000344; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +0000345; AVX512VPOPCNTDQ-NEXT: retq
346;
Craig Topper196a5602017-12-16 02:40:28 +0000347; AVX512VPOPCNTDQVL-LABEL: testv8i32:
348; AVX512VPOPCNTDQVL: # %bb.0:
Craig Topper196a5602017-12-16 02:40:28 +0000349; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000350; AVX512VPOPCNTDQVL-NEXT: vpaddd %ymm1, %ymm0, %ymm1
351; AVX512VPOPCNTDQVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper196a5602017-12-16 02:40:28 +0000352; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
353; AVX512VPOPCNTDQVL-NEXT: retq
354;
Craig Topper54ab0be2017-11-22 23:35:12 +0000355; BITALG_NOVLX-LABEL: testv8i32:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000356; BITALG_NOVLX: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000357; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
358; BITALG_NOVLX-NEXT: vpaddd %ymm1, %ymm0, %ymm1
359; BITALG_NOVLX-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrimc4e53cf2018-10-12 14:18:47 +0000360; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000361; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
Craig Topper54ab0be2017-11-22 23:35:12 +0000362; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
363; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2
364; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
365; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
366; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
367; BITALG_NOVLX-NEXT: retq
368;
369; BITALG-LABEL: testv8i32:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000370; BITALG: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000371; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
372; BITALG-NEXT: vpaddd %ymm1, %ymm0, %ymm1
373; BITALG-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrimc4e53cf2018-10-12 14:18:47 +0000374; BITALG-NEXT: vpopcntb %ymm0, %ymm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000375; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
Craig Topper54ab0be2017-11-22 23:35:12 +0000376; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
377; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2
378; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
379; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
380; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
381; BITALG-NEXT: retq
382;
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000383; X32-AVX-LABEL: testv8i32:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000384; X32-AVX: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000385; X32-AVX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
386; X32-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm1
387; X32-AVX-NEXT: vpandn %ymm1, %ymm0, %ymm0
388; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
389; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2
390; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
391; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000392; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000393; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
394; X32-AVX-NEXT: vpshufb %ymm0, %ymm3, %ymm0
395; X32-AVX-NEXT: vpaddb %ymm2, %ymm0, %ymm0
396; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000397; X32-AVX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
398; X32-AVX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2
399; X32-AVX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
400; X32-AVX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
401; X32-AVX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
402; X32-AVX-NEXT: retl
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000403 %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %in, i1 0)
404 ret <8 x i32> %out
405}
406
Simon Pilgrim68688c12015-07-19 17:09:43 +0000407define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000408; AVX1-LABEL: testv8i32u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000409; AVX1: # %bb.0:
Sanjay Patel8f61a6e2017-05-26 15:33:18 +0000410; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000411; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
412; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm3
413; AVX1-NEXT: vpandn %xmm3, %xmm1, %xmm1
414; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
415; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm4
416; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
417; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4
Sanjay Patel8f61a6e2017-05-26 15:33:18 +0000418; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000419; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
420; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1
421; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1
422; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
423; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm6 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
424; AVX1-NEXT: vpsadbw %xmm4, %xmm6, %xmm6
Sanjay Patel8f61a6e2017-05-26 15:33:18 +0000425; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
Simon Pilgrime76d7092018-10-13 12:12:06 +0000426; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
427; AVX1-NEXT: vpackuswb %xmm6, %xmm1, %xmm1
428; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm2
429; AVX1-NEXT: vpandn %xmm2, %xmm0, %xmm0
430; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm2
431; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000432; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000433; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
434; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0
435; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
436; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
437; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
Simon Pilgrimbdcd71d2016-11-29 14:18:51 +0000438; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
Simon Pilgrime76d7092018-10-13 12:12:06 +0000439; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
440; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
Sanjay Patel8f61a6e2017-05-26 15:33:18 +0000441; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000442; AVX1-NEXT: retq
443;
444; AVX2-LABEL: testv8i32u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000445; AVX2: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000446; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
447; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm1
448; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0
449; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
450; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
451; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
452; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000453; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000454; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
455; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
456; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
457; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000458; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
Cong Hou5c1d0fd2015-11-24 19:51:26 +0000459; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000460; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
Cong Hou5c1d0fd2015-11-24 19:51:26 +0000461; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000462; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000463; AVX2-NEXT: retq
Craig Topper6990d612016-04-20 05:19:01 +0000464;
465; AVX512CDVL-LABEL: testv8i32u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000466; AVX512CDVL: # %bb.0:
Simon Pilgrim3b18a972018-10-13 13:05:19 +0000467; AVX512CDVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
468; AVX512CDVL-NEXT: vpaddd %ymm1, %ymm0, %ymm1
469; AVX512CDVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper6990d612016-04-20 05:19:01 +0000470; AVX512CDVL-NEXT: vplzcntd %ymm0, %ymm0
Simon Pilgrim3b18a972018-10-13 13:05:19 +0000471; AVX512CDVL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32,32,32,32,32,32,32,32]
Craig Topper6990d612016-04-20 05:19:01 +0000472; AVX512CDVL-NEXT: vpsubd %ymm0, %ymm1, %ymm0
473; AVX512CDVL-NEXT: retq
Craig Topper758524f2016-04-22 03:22:38 +0000474;
475; AVX512CD-LABEL: testv8i32u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000476; AVX512CD: # %bb.0:
Simon Pilgrim3b18a972018-10-13 13:05:19 +0000477; AVX512CD-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
478; AVX512CD-NEXT: vpaddd %ymm1, %ymm0, %ymm1
479; AVX512CD-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper758524f2016-04-22 03:22:38 +0000480; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
Simon Pilgrim3b18a972018-10-13 13:05:19 +0000481; AVX512CD-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32,32,32,32,32,32,32,32]
Craig Topper758524f2016-04-22 03:22:38 +0000482; AVX512CD-NEXT: vpsubd %ymm0, %ymm1, %ymm0
483; AVX512CD-NEXT: retq
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000484;
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +0000485; AVX512VPOPCNTDQ-LABEL: testv8i32u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000486; AVX512VPOPCNTDQ: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000487; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000488; AVX512VPOPCNTDQ-NEXT: vpaddd %ymm1, %ymm0, %ymm1
489; AVX512VPOPCNTDQ-NEXT: vpandn %ymm1, %ymm0, %ymm0
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +0000490; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
Puyan Lotfi10769692018-01-31 22:04:26 +0000491; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +0000492; AVX512VPOPCNTDQ-NEXT: retq
493;
Craig Topper196a5602017-12-16 02:40:28 +0000494; AVX512VPOPCNTDQVL-LABEL: testv8i32u:
495; AVX512VPOPCNTDQVL: # %bb.0:
Craig Topper196a5602017-12-16 02:40:28 +0000496; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000497; AVX512VPOPCNTDQVL-NEXT: vpaddd %ymm1, %ymm0, %ymm1
498; AVX512VPOPCNTDQVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper196a5602017-12-16 02:40:28 +0000499; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
500; AVX512VPOPCNTDQVL-NEXT: retq
501;
Craig Topper54ab0be2017-11-22 23:35:12 +0000502; BITALG_NOVLX-LABEL: testv8i32u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000503; BITALG_NOVLX: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000504; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
505; BITALG_NOVLX-NEXT: vpaddd %ymm1, %ymm0, %ymm1
506; BITALG_NOVLX-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrimc4e53cf2018-10-12 14:18:47 +0000507; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000508; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
Craig Topper54ab0be2017-11-22 23:35:12 +0000509; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
510; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2
511; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
512; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
513; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
514; BITALG_NOVLX-NEXT: retq
515;
516; BITALG-LABEL: testv8i32u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000517; BITALG: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000518; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
519; BITALG-NEXT: vpaddd %ymm1, %ymm0, %ymm1
520; BITALG-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrimc4e53cf2018-10-12 14:18:47 +0000521; BITALG-NEXT: vpopcntb %ymm0, %ymm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000522; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
Craig Topper54ab0be2017-11-22 23:35:12 +0000523; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
524; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2
525; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
526; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
527; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
528; BITALG-NEXT: retq
529;
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000530; X32-AVX-LABEL: testv8i32u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000531; X32-AVX: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000532; X32-AVX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
533; X32-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm1
534; X32-AVX-NEXT: vpandn %ymm1, %ymm0, %ymm0
535; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
536; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2
537; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
538; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000539; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000540; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
541; X32-AVX-NEXT: vpshufb %ymm0, %ymm3, %ymm0
542; X32-AVX-NEXT: vpaddb %ymm2, %ymm0, %ymm0
543; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000544; X32-AVX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
545; X32-AVX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2
546; X32-AVX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
547; X32-AVX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
548; X32-AVX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
549; X32-AVX-NEXT: retl
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000550 %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %in, i1 -1)
551 ret <8 x i32> %out
552}
553
Simon Pilgrim68688c12015-07-19 17:09:43 +0000554define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000555; AVX1-LABEL: testv16i16:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000556; AVX1: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000557; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
558; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm2
559; AVX1-NEXT: vpandn %xmm2, %xmm0, %xmm2
560; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
561; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
562; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
563; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000564; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm2
Simon Pilgrime76d7092018-10-13 12:12:06 +0000565; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
566; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2
567; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
568; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4
569; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000570; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
571; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000572; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm1
573; AVX1-NEXT: vpandn %xmm1, %xmm0, %xmm0
574; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm1
575; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000576; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000577; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
578; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000579; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
580; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
581; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
582; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
583; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000584; AVX1-NEXT: retq
585;
586; AVX2-LABEL: testv16i16:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000587; AVX2: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000588; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000589; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm1
590; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000591; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
592; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
593; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
594; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
595; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
596; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
597; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
598; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
599; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1
600; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0
601; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000602; AVX2-NEXT: retq
Craig Topper6990d612016-04-20 05:19:01 +0000603;
604; AVX512CDVL-LABEL: testv16i16:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000605; AVX512CDVL: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000606; AVX512CDVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000607; AVX512CDVL-NEXT: vpaddw %ymm1, %ymm0, %ymm1
608; AVX512CDVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Gadi Haberb3ccb4e2016-12-28 10:12:48 +0000609; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
610; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm2
611; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
Craig Topper6990d612016-04-20 05:19:01 +0000612; AVX512CDVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
613; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0
Gadi Haberb3ccb4e2016-12-28 10:12:48 +0000614; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0
Craig Topper6990d612016-04-20 05:19:01 +0000615; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
616; AVX512CDVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
617; AVX512CDVL-NEXT: vpsllw $8, %ymm0, %ymm1
618; AVX512CDVL-NEXT: vpaddb %ymm0, %ymm1, %ymm0
619; AVX512CDVL-NEXT: vpsrlw $8, %ymm0, %ymm0
620; AVX512CDVL-NEXT: retq
Craig Topper758524f2016-04-22 03:22:38 +0000621;
622; AVX512CD-LABEL: testv16i16:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000623; AVX512CD: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000624; AVX512CD-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000625; AVX512CD-NEXT: vpaddw %ymm1, %ymm0, %ymm1
626; AVX512CD-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper758524f2016-04-22 03:22:38 +0000627; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
628; AVX512CD-NEXT: vpand %ymm1, %ymm0, %ymm2
629; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
630; AVX512CD-NEXT: vpshufb %ymm2, %ymm3, %ymm2
631; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0
632; AVX512CD-NEXT: vpand %ymm1, %ymm0, %ymm0
633; AVX512CD-NEXT: vpshufb %ymm0, %ymm3, %ymm0
634; AVX512CD-NEXT: vpaddb %ymm2, %ymm0, %ymm0
635; AVX512CD-NEXT: vpsllw $8, %ymm0, %ymm1
636; AVX512CD-NEXT: vpaddb %ymm0, %ymm1, %ymm0
637; AVX512CD-NEXT: vpsrlw $8, %ymm0, %ymm0
638; AVX512CD-NEXT: retq
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000639;
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +0000640; AVX512VPOPCNTDQ-LABEL: testv16i16:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000641; AVX512VPOPCNTDQ: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000642; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000643; AVX512VPOPCNTDQ-NEXT: vpaddw %ymm1, %ymm0, %ymm1
644; AVX512VPOPCNTDQ-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrimafebd3f2017-07-02 19:32:37 +0000645; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
646; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
647; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +0000648; AVX512VPOPCNTDQ-NEXT: retq
649;
Craig Topper196a5602017-12-16 02:40:28 +0000650; AVX512VPOPCNTDQVL-LABEL: testv16i16:
651; AVX512VPOPCNTDQVL: # %bb.0:
Craig Topper196a5602017-12-16 02:40:28 +0000652; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000653; AVX512VPOPCNTDQVL-NEXT: vpaddw %ymm1, %ymm0, %ymm1
654; AVX512VPOPCNTDQVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper196a5602017-12-16 02:40:28 +0000655; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
656; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
657; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0
658; AVX512VPOPCNTDQVL-NEXT: retq
659;
Coby Tayree09a18aa2017-11-21 10:32:42 +0000660; BITALG_NOVLX-LABEL: testv16i16:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000661; BITALG_NOVLX: # %bb.0:
Coby Tayree09a18aa2017-11-21 10:32:42 +0000662; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000663; BITALG_NOVLX-NEXT: vpaddw %ymm1, %ymm0, %ymm1
664; BITALG_NOVLX-NEXT: vpandn %ymm1, %ymm0, %ymm0
Coby Tayree09a18aa2017-11-21 10:32:42 +0000665; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
Puyan Lotfi10769692018-01-31 22:04:26 +0000666; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
Coby Tayree09a18aa2017-11-21 10:32:42 +0000667; BITALG_NOVLX-NEXT: retq
668;
669; BITALG-LABEL: testv16i16:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000670; BITALG: # %bb.0:
Coby Tayree09a18aa2017-11-21 10:32:42 +0000671; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000672; BITALG-NEXT: vpaddw %ymm1, %ymm0, %ymm1
673; BITALG-NEXT: vpandn %ymm1, %ymm0, %ymm0
Coby Tayree09a18aa2017-11-21 10:32:42 +0000674; BITALG-NEXT: vpopcntw %ymm0, %ymm0
675; BITALG-NEXT: retq
676;
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000677; X32-AVX-LABEL: testv16i16:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000678; X32-AVX: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000679; X32-AVX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000680; X32-AVX-NEXT: vpaddw %ymm1, %ymm0, %ymm1
681; X32-AVX-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000682; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
683; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2
684; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
685; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
686; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0
687; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
688; X32-AVX-NEXT: vpshufb %ymm0, %ymm3, %ymm0
689; X32-AVX-NEXT: vpaddb %ymm2, %ymm0, %ymm0
690; X32-AVX-NEXT: vpsllw $8, %ymm0, %ymm1
691; X32-AVX-NEXT: vpaddb %ymm0, %ymm1, %ymm0
692; X32-AVX-NEXT: vpsrlw $8, %ymm0, %ymm0
693; X32-AVX-NEXT: retl
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000694 %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %in, i1 0)
695 ret <16 x i16> %out
696}
697
Simon Pilgrim68688c12015-07-19 17:09:43 +0000698define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind {
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000699; AVX1-LABEL: testv16i16u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000700; AVX1: # %bb.0:
Simon Pilgrime76d7092018-10-13 12:12:06 +0000701; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
702; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm2
703; AVX1-NEXT: vpandn %xmm2, %xmm0, %xmm2
704; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
705; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
706; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
707; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000708; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm2
Simon Pilgrime76d7092018-10-13 12:12:06 +0000709; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
710; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2
711; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
712; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4
713; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000714; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
715; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000716; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm1
717; AVX1-NEXT: vpandn %xmm1, %xmm0, %xmm0
718; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm1
719; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000720; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000721; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
722; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000723; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
724; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
725; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
726; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
727; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000728; AVX1-NEXT: retq
729;
730; AVX2-LABEL: testv16i16u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000731; AVX2: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000732; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000733; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm1
734; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000735; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
736; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
737; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
738; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
739; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
740; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
741; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
742; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
743; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1
744; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0
745; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000746; AVX2-NEXT: retq
Craig Topper6990d612016-04-20 05:19:01 +0000747;
748; AVX512CDVL-LABEL: testv16i16u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000749; AVX512CDVL: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000750; AVX512CDVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000751; AVX512CDVL-NEXT: vpaddw %ymm1, %ymm0, %ymm1
752; AVX512CDVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Gadi Haberb3ccb4e2016-12-28 10:12:48 +0000753; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
754; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm2
755; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
Craig Topper6990d612016-04-20 05:19:01 +0000756; AVX512CDVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
757; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0
Gadi Haberb3ccb4e2016-12-28 10:12:48 +0000758; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0
Craig Topper6990d612016-04-20 05:19:01 +0000759; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
760; AVX512CDVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
761; AVX512CDVL-NEXT: vpsllw $8, %ymm0, %ymm1
762; AVX512CDVL-NEXT: vpaddb %ymm0, %ymm1, %ymm0
763; AVX512CDVL-NEXT: vpsrlw $8, %ymm0, %ymm0
764; AVX512CDVL-NEXT: retq
Craig Topper758524f2016-04-22 03:22:38 +0000765;
766; AVX512CD-LABEL: testv16i16u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000767; AVX512CD: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000768; AVX512CD-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000769; AVX512CD-NEXT: vpaddw %ymm1, %ymm0, %ymm1
770; AVX512CD-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper758524f2016-04-22 03:22:38 +0000771; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
772; AVX512CD-NEXT: vpand %ymm1, %ymm0, %ymm2
773; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
774; AVX512CD-NEXT: vpshufb %ymm2, %ymm3, %ymm2
775; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0
776; AVX512CD-NEXT: vpand %ymm1, %ymm0, %ymm0
777; AVX512CD-NEXT: vpshufb %ymm0, %ymm3, %ymm0
778; AVX512CD-NEXT: vpaddb %ymm2, %ymm0, %ymm0
779; AVX512CD-NEXT: vpsllw $8, %ymm0, %ymm1
780; AVX512CD-NEXT: vpaddb %ymm0, %ymm1, %ymm0
781; AVX512CD-NEXT: vpsrlw $8, %ymm0, %ymm0
782; AVX512CD-NEXT: retq
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000783;
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +0000784; AVX512VPOPCNTDQ-LABEL: testv16i16u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000785; AVX512VPOPCNTDQ: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000786; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000787; AVX512VPOPCNTDQ-NEXT: vpaddw %ymm1, %ymm0, %ymm1
788; AVX512VPOPCNTDQ-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrimafebd3f2017-07-02 19:32:37 +0000789; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
790; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
791; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +0000792; AVX512VPOPCNTDQ-NEXT: retq
793;
Craig Topper196a5602017-12-16 02:40:28 +0000794; AVX512VPOPCNTDQVL-LABEL: testv16i16u:
795; AVX512VPOPCNTDQVL: # %bb.0:
Craig Topper196a5602017-12-16 02:40:28 +0000796; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000797; AVX512VPOPCNTDQVL-NEXT: vpaddw %ymm1, %ymm0, %ymm1
798; AVX512VPOPCNTDQVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper196a5602017-12-16 02:40:28 +0000799; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
800; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
801; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0
802; AVX512VPOPCNTDQVL-NEXT: retq
803;
Craig Topper54ab0be2017-11-22 23:35:12 +0000804; BITALG_NOVLX-LABEL: testv16i16u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000805; BITALG_NOVLX: # %bb.0:
Craig Topper54ab0be2017-11-22 23:35:12 +0000806; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000807; BITALG_NOVLX-NEXT: vpaddw %ymm1, %ymm0, %ymm1
808; BITALG_NOVLX-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper54ab0be2017-11-22 23:35:12 +0000809; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
Puyan Lotfi10769692018-01-31 22:04:26 +0000810; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
Craig Topper54ab0be2017-11-22 23:35:12 +0000811; BITALG_NOVLX-NEXT: retq
812;
813; BITALG-LABEL: testv16i16u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000814; BITALG: # %bb.0:
Craig Topper54ab0be2017-11-22 23:35:12 +0000815; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000816; BITALG-NEXT: vpaddw %ymm1, %ymm0, %ymm1
817; BITALG-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper54ab0be2017-11-22 23:35:12 +0000818; BITALG-NEXT: vpopcntw %ymm0, %ymm0
819; BITALG-NEXT: retq
820;
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000821; X32-AVX-LABEL: testv16i16u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000822; X32-AVX: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000823; X32-AVX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000824; X32-AVX-NEXT: vpaddw %ymm1, %ymm0, %ymm1
825; X32-AVX-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000826; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
827; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2
828; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
829; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
830; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0
831; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
832; X32-AVX-NEXT: vpshufb %ymm0, %ymm3, %ymm0
833; X32-AVX-NEXT: vpaddb %ymm2, %ymm0, %ymm0
834; X32-AVX-NEXT: vpsllw $8, %ymm0, %ymm1
835; X32-AVX-NEXT: vpaddb %ymm0, %ymm1, %ymm0
836; X32-AVX-NEXT: vpsrlw $8, %ymm0, %ymm0
837; X32-AVX-NEXT: retl
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000838 %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %in, i1 -1)
839 ret <16 x i16> %out
840}
841
Simon Pilgrim68688c12015-07-19 17:09:43 +0000842define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000843; AVX1-LABEL: testv32i8:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000844; AVX1: # %bb.0:
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000845; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000846; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
847; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm3
848; AVX1-NEXT: vpandn %xmm3, %xmm1, %xmm1
849; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
850; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm4
851; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
852; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4
Sanjay Patel8f61a6e2017-05-26 15:33:18 +0000853; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000854; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
855; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1
856; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1
857; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm2
858; AVX1-NEXT: vpandn %xmm2, %xmm0, %xmm0
859; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm2
860; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000861; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000862; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
863; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000864; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000865; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
866; AVX1-NEXT: retq
867;
868; AVX2-LABEL: testv32i8:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000869; AVX2: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000870; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000871; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm1
872; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000873; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
874; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
875; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
876; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
877; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
878; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
879; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
880; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000881; AVX2-NEXT: retq
Craig Topper6990d612016-04-20 05:19:01 +0000882;
883; AVX512CDVL-LABEL: testv32i8:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000884; AVX512CDVL: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000885; AVX512CDVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000886; AVX512CDVL-NEXT: vpaddb %ymm1, %ymm0, %ymm1
887; AVX512CDVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Gadi Haberb3ccb4e2016-12-28 10:12:48 +0000888; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
889; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm2
890; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
Craig Topper6990d612016-04-20 05:19:01 +0000891; AVX512CDVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
892; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0
Gadi Haberb3ccb4e2016-12-28 10:12:48 +0000893; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0
Craig Topper6990d612016-04-20 05:19:01 +0000894; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
895; AVX512CDVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
896; AVX512CDVL-NEXT: retq
Craig Topper758524f2016-04-22 03:22:38 +0000897;
898; AVX512CD-LABEL: testv32i8:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000899; AVX512CD: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000900; AVX512CD-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000901; AVX512CD-NEXT: vpaddb %ymm1, %ymm0, %ymm1
902; AVX512CD-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper758524f2016-04-22 03:22:38 +0000903; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
904; AVX512CD-NEXT: vpand %ymm1, %ymm0, %ymm2
905; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
906; AVX512CD-NEXT: vpshufb %ymm2, %ymm3, %ymm2
907; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0
908; AVX512CD-NEXT: vpand %ymm1, %ymm0, %ymm0
909; AVX512CD-NEXT: vpshufb %ymm0, %ymm3, %ymm0
910; AVX512CD-NEXT: vpaddb %ymm2, %ymm0, %ymm0
911; AVX512CD-NEXT: retq
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000912;
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +0000913; AVX512VPOPCNTDQ-LABEL: testv32i8:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000914; AVX512VPOPCNTDQ: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000915; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000916; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm1, %ymm0, %ymm1
917; AVX512VPOPCNTDQ-NEXT: vpandn %ymm1, %ymm0, %ymm0
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +0000918; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
919; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm2
920; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
921; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2
922; AVX512VPOPCNTDQ-NEXT: vpsrlw $4, %ymm0, %ymm0
923; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0
924; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm0, %ymm3, %ymm0
925; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
926; AVX512VPOPCNTDQ-NEXT: retq
927;
Craig Topper196a5602017-12-16 02:40:28 +0000928; AVX512VPOPCNTDQVL-LABEL: testv32i8:
929; AVX512VPOPCNTDQVL: # %bb.0:
Craig Topper196a5602017-12-16 02:40:28 +0000930; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000931; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm1, %ymm0, %ymm1
932; AVX512VPOPCNTDQVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper196a5602017-12-16 02:40:28 +0000933; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
934; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2
935; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
936; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
937; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0
938; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
939; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
940; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
941; AVX512VPOPCNTDQVL-NEXT: retq
942;
Coby Tayree09a18aa2017-11-21 10:32:42 +0000943; BITALG_NOVLX-LABEL: testv32i8:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000944; BITALG_NOVLX: # %bb.0:
Coby Tayree09a18aa2017-11-21 10:32:42 +0000945; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000946; BITALG_NOVLX-NEXT: vpaddb %ymm1, %ymm0, %ymm1
947; BITALG_NOVLX-NEXT: vpandn %ymm1, %ymm0, %ymm0
Coby Tayree09a18aa2017-11-21 10:32:42 +0000948; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
Puyan Lotfi10769692018-01-31 22:04:26 +0000949; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
Coby Tayree09a18aa2017-11-21 10:32:42 +0000950; BITALG_NOVLX-NEXT: retq
951;
952; BITALG-LABEL: testv32i8:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000953; BITALG: # %bb.0:
Coby Tayree09a18aa2017-11-21 10:32:42 +0000954; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000955; BITALG-NEXT: vpaddb %ymm1, %ymm0, %ymm1
956; BITALG-NEXT: vpandn %ymm1, %ymm0, %ymm0
Coby Tayree09a18aa2017-11-21 10:32:42 +0000957; BITALG-NEXT: vpopcntb %ymm0, %ymm0
958; BITALG-NEXT: retq
959;
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000960; X32-AVX-LABEL: testv32i8:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000961; X32-AVX: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +0000962; X32-AVX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000963; X32-AVX-NEXT: vpaddb %ymm1, %ymm0, %ymm1
964; X32-AVX-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrim4b5784c2016-10-21 10:50:52 +0000965; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
966; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2
967; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
968; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
969; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0
970; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
971; X32-AVX-NEXT: vpshufb %ymm0, %ymm3, %ymm0
972; X32-AVX-NEXT: vpaddb %ymm2, %ymm0, %ymm0
973; X32-AVX-NEXT: retl
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000974 %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %in, i1 0)
975 ret <32 x i8> %out
976}
977
Simon Pilgrim68688c12015-07-19 17:09:43 +0000978define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000979; AVX1-LABEL: testv32i8u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +0000980; AVX1: # %bb.0:
Simon Pilgrim30d36cc2015-06-07 21:01:34 +0000981; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000982; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
983; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm3
984; AVX1-NEXT: vpandn %xmm3, %xmm1, %xmm1
985; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
986; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm4
987; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
988; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4
Sanjay Patel8f61a6e2017-05-26 15:33:18 +0000989; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
Simon Pilgrime76d7092018-10-13 12:12:06 +0000990; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
991; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1
992; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1
993; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm2
994; AVX1-NEXT: vpandn %xmm2, %xmm0, %xmm0
995; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm2
996; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2
Simon Pilgrimafa71f42015-09-19 13:22:57 +0000997; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
Simon Pilgrime76d7092018-10-13 12:12:06 +0000998; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
999; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0
Simon Pilgrimafa71f42015-09-19 13:22:57 +00001000; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +00001001; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1002; AVX1-NEXT: retq
1003;
1004; AVX2-LABEL: testv32i8u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +00001005; AVX2: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +00001006; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +00001007; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm1
1008; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrimafa71f42015-09-19 13:22:57 +00001009; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1010; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
1011; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1012; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
1013; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
1014; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
1015; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
1016; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
Simon Pilgrim30d36cc2015-06-07 21:01:34 +00001017; AVX2-NEXT: retq
Craig Topper6990d612016-04-20 05:19:01 +00001018;
1019; AVX512CDVL-LABEL: testv32i8u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +00001020; AVX512CDVL: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +00001021; AVX512CDVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +00001022; AVX512CDVL-NEXT: vpaddb %ymm1, %ymm0, %ymm1
1023; AVX512CDVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Gadi Haberb3ccb4e2016-12-28 10:12:48 +00001024; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1025; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm2
1026; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
Craig Topper6990d612016-04-20 05:19:01 +00001027; AVX512CDVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
1028; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0
Gadi Haberb3ccb4e2016-12-28 10:12:48 +00001029; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0
Craig Topper6990d612016-04-20 05:19:01 +00001030; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
1031; AVX512CDVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
1032; AVX512CDVL-NEXT: retq
Craig Topper758524f2016-04-22 03:22:38 +00001033;
1034; AVX512CD-LABEL: testv32i8u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +00001035; AVX512CD: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +00001036; AVX512CD-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +00001037; AVX512CD-NEXT: vpaddb %ymm1, %ymm0, %ymm1
1038; AVX512CD-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper758524f2016-04-22 03:22:38 +00001039; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1040; AVX512CD-NEXT: vpand %ymm1, %ymm0, %ymm2
1041; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1042; AVX512CD-NEXT: vpshufb %ymm2, %ymm3, %ymm2
1043; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0
1044; AVX512CD-NEXT: vpand %ymm1, %ymm0, %ymm0
1045; AVX512CD-NEXT: vpshufb %ymm0, %ymm3, %ymm0
1046; AVX512CD-NEXT: vpaddb %ymm2, %ymm0, %ymm0
1047; AVX512CD-NEXT: retq
Simon Pilgrim4b5784c2016-10-21 10:50:52 +00001048;
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +00001049; AVX512VPOPCNTDQ-LABEL: testv32i8u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +00001050; AVX512VPOPCNTDQ: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +00001051; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +00001052; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm1, %ymm0, %ymm1
1053; AVX512VPOPCNTDQ-NEXT: vpandn %ymm1, %ymm0, %ymm0
Oren Ben Simhonf3cb5d62017-05-25 13:45:23 +00001054; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1055; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm2
1056; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1057; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2
1058; AVX512VPOPCNTDQ-NEXT: vpsrlw $4, %ymm0, %ymm0
1059; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0
1060; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm0, %ymm3, %ymm0
1061; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
1062; AVX512VPOPCNTDQ-NEXT: retq
1063;
Craig Topper196a5602017-12-16 02:40:28 +00001064; AVX512VPOPCNTDQVL-LABEL: testv32i8u:
1065; AVX512VPOPCNTDQVL: # %bb.0:
Craig Topper196a5602017-12-16 02:40:28 +00001066; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +00001067; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm1, %ymm0, %ymm1
1068; AVX512VPOPCNTDQVL-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper196a5602017-12-16 02:40:28 +00001069; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1070; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2
1071; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1072; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
1073; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0
1074; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
1075; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
1076; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
1077; AVX512VPOPCNTDQVL-NEXT: retq
1078;
Craig Topper54ab0be2017-11-22 23:35:12 +00001079; BITALG_NOVLX-LABEL: testv32i8u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +00001080; BITALG_NOVLX: # %bb.0:
Craig Topper54ab0be2017-11-22 23:35:12 +00001081; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +00001082; BITALG_NOVLX-NEXT: vpaddb %ymm1, %ymm0, %ymm1
1083; BITALG_NOVLX-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper54ab0be2017-11-22 23:35:12 +00001084; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
Puyan Lotfi10769692018-01-31 22:04:26 +00001085; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
Craig Topper54ab0be2017-11-22 23:35:12 +00001086; BITALG_NOVLX-NEXT: retq
1087;
1088; BITALG-LABEL: testv32i8u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +00001089; BITALG: # %bb.0:
Craig Topper54ab0be2017-11-22 23:35:12 +00001090; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +00001091; BITALG-NEXT: vpaddb %ymm1, %ymm0, %ymm1
1092; BITALG-NEXT: vpandn %ymm1, %ymm0, %ymm0
Craig Topper54ab0be2017-11-22 23:35:12 +00001093; BITALG-NEXT: vpopcntb %ymm0, %ymm0
1094; BITALG-NEXT: retq
1095;
Simon Pilgrim4b5784c2016-10-21 10:50:52 +00001096; X32-AVX-LABEL: testv32i8u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +00001097; X32-AVX: # %bb.0:
Sanjay Patel8bfeccc2017-06-26 14:19:26 +00001098; X32-AVX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
Simon Pilgrime76d7092018-10-13 12:12:06 +00001099; X32-AVX-NEXT: vpaddb %ymm1, %ymm0, %ymm1
1100; X32-AVX-NEXT: vpandn %ymm1, %ymm0, %ymm0
Simon Pilgrim4b5784c2016-10-21 10:50:52 +00001101; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1102; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2
1103; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1104; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
1105; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0
1106; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
1107; X32-AVX-NEXT: vpshufb %ymm0, %ymm3, %ymm0
1108; X32-AVX-NEXT: vpaddb %ymm2, %ymm0, %ymm0
1109; X32-AVX-NEXT: retl
Simon Pilgrim30d36cc2015-06-07 21:01:34 +00001110 %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %in, i1 -1)
1111 ret <32 x i8> %out
1112}
1113
Simon Pilgrim68688c12015-07-19 17:09:43 +00001114define <4 x i64> @foldv4i64() nounwind {
Simon Pilgrim4b5784c2016-10-21 10:50:52 +00001115; AVX-LABEL: foldv4i64:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +00001116; AVX: # %bb.0:
Simon Pilgrim4b5784c2016-10-21 10:50:52 +00001117; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
1118; AVX-NEXT: retq
1119;
Craig Topper54ab0be2017-11-22 23:35:12 +00001120; BITALG_NOVLX-LABEL: foldv4i64:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +00001121; BITALG_NOVLX: # %bb.0:
Craig Topper54ab0be2017-11-22 23:35:12 +00001122; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
1123; BITALG_NOVLX-NEXT: retq
1124;
1125; BITALG-LABEL: foldv4i64:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +00001126; BITALG: # %bb.0:
Craig Topper54ab0be2017-11-22 23:35:12 +00001127; BITALG-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
1128; BITALG-NEXT: retq
1129;
Simon Pilgrim4b5784c2016-10-21 10:50:52 +00001130; X32-AVX-LABEL: foldv4i64:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +00001131; X32-AVX: # %bb.0:
Simon Pilgrimada0a4f2017-02-10 14:37:25 +00001132; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,0,0,64,0,0,0]
Simon Pilgrim4b5784c2016-10-21 10:50:52 +00001133; X32-AVX-NEXT: retl
Simon Pilgrimd72b3572015-06-08 09:57:09 +00001134 %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
1135 ret <4 x i64> %out
1136}
1137
Simon Pilgrim68688c12015-07-19 17:09:43 +00001138define <4 x i64> @foldv4i64u() nounwind {
Simon Pilgrim4b5784c2016-10-21 10:50:52 +00001139; AVX-LABEL: foldv4i64u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +00001140; AVX: # %bb.0:
Simon Pilgrim4b5784c2016-10-21 10:50:52 +00001141; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
1142; AVX-NEXT: retq
1143;
Craig Topper54ab0be2017-11-22 23:35:12 +00001144; BITALG_NOVLX-LABEL: foldv4i64u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +00001145; BITALG_NOVLX: # %bb.0:
Craig Topper54ab0be2017-11-22 23:35:12 +00001146; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
1147; BITALG_NOVLX-NEXT: retq
1148;
1149; BITALG-LABEL: foldv4i64u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +00001150; BITALG: # %bb.0:
Craig Topper54ab0be2017-11-22 23:35:12 +00001151; BITALG-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
1152; BITALG-NEXT: retq
1153;
Simon Pilgrim4b5784c2016-10-21 10:50:52 +00001154; X32-AVX-LABEL: foldv4i64u:
Francis Visoiu Mistrihca0df552017-12-04 17:18:51 +00001155; X32-AVX: # %bb.0:
Simon Pilgrimada0a4f2017-02-10 14:37:25 +00001156; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,0,0,64,0,0,0]
Simon Pilgrim4b5784c2016-10-21 10:50:52 +00001157; X32-AVX-NEXT: retl
Simon Pilgrimd72b3572015-06-08 09:57:09 +00001158 %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
1159 ret <4 x i64> %out
1160}
1161
Simon Pilgrim68688c12015-07-19 17:09:43 +00001162define <8 x i32> @foldv8i32() nounwind {
Simon Pilgrimcadc63b2018-10-12 13:24:51 +00001163; ALL-LABEL: foldv8i32:
1164; ALL: # %bb.0:
1165; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
1166; ALL-NEXT: ret{{[l|q]}}
Simon Pilgrimd72b3572015-06-08 09:57:09 +00001167 %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
1168 ret <8 x i32> %out
1169}
1170
Simon Pilgrim68688c12015-07-19 17:09:43 +00001171define <8 x i32> @foldv8i32u() nounwind {
Simon Pilgrimcadc63b2018-10-12 13:24:51 +00001172; ALL-LABEL: foldv8i32u:
1173; ALL: # %bb.0:
1174; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
1175; ALL-NEXT: ret{{[l|q]}}
Simon Pilgrimd72b3572015-06-08 09:57:09 +00001176 %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
1177 ret <8 x i32> %out
1178}
1179
Simon Pilgrim68688c12015-07-19 17:09:43 +00001180define <16 x i16> @foldv16i16() nounwind {
Simon Pilgrimcadc63b2018-10-12 13:24:51 +00001181; ALL-LABEL: foldv16i16:
1182; ALL: # %bb.0:
1183; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
1184; ALL-NEXT: ret{{[l|q]}}
Simon Pilgrimd72b3572015-06-08 09:57:09 +00001185 %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
1186 ret <16 x i16> %out
1187}
1188
Simon Pilgrim68688c12015-07-19 17:09:43 +00001189define <16 x i16> @foldv16i16u() nounwind {
Simon Pilgrimcadc63b2018-10-12 13:24:51 +00001190; ALL-LABEL: foldv16i16u:
1191; ALL: # %bb.0:
1192; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
1193; ALL-NEXT: ret{{[l|q]}}
Simon Pilgrimd72b3572015-06-08 09:57:09 +00001194 %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
1195 ret <16 x i16> %out
1196}
1197
Simon Pilgrim68688c12015-07-19 17:09:43 +00001198define <32 x i8> @foldv32i8() nounwind {
Simon Pilgrimcadc63b2018-10-12 13:24:51 +00001199; ALL-LABEL: foldv32i8:
1200; ALL: # %bb.0:
1201; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
1202; ALL-NEXT: ret{{[l|q]}}
Simon Pilgrimd72b3572015-06-08 09:57:09 +00001203 %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
1204 ret <32 x i8> %out
1205}
1206
Simon Pilgrim68688c12015-07-19 17:09:43 +00001207define <32 x i8> @foldv32i8u() nounwind {
Simon Pilgrimcadc63b2018-10-12 13:24:51 +00001208; ALL-LABEL: foldv32i8u:
1209; ALL: # %bb.0:
1210; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
1211; ALL-NEXT: ret{{[l|q]}}
Simon Pilgrimd72b3572015-06-08 09:57:09 +00001212 %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
1213 ret <32 x i8> %out
1214}
1215
Simon Pilgrim30d36cc2015-06-07 21:01:34 +00001216declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1)
1217declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
1218declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
1219declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)