| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw,+popcnt | FileCheck %s --check-prefix=BW |
| ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw,+avx512dq,+popcnt | FileCheck %s --check-prefix=DQ |
| |
| ; Test (zext (and (trunc x) C)) -> (and x C) fold with AVX512 mask registers. |
| ; Ensures "andb $7, %al; movzbl %al, %eax" is folded to "andl $7, %eax". |
| ; Without AVX512DQ: bitcast v16i1->i16 + truncate i16->i8 (TRUNCATE path). |
| ; With AVX512DQ: extract_subvector v16i1->v8i1 + bitcast v8i1->i8, which |
| ; visitBITCAST canonicalises to the same truncate form before the fold fires. |
| |
| define i8 @ctpop_aext_i3_v3i1(ptr %p) { |
| ; BW-LABEL: ctpop_aext_i3_v3i1: |
| ; BW: # %bb.0: |
| ; BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; BW-NEXT: vpbroadcastb {{.*#+}} xmm1 = [61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61] |
| ; BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 |
| ; BW-NEXT: kmovd %k0, %eax |
| ; BW-NEXT: andl $7, %eax |
| ; BW-NEXT: popcntl %eax, %eax |
| ; BW-NEXT: # kill: def $al killed $al killed $eax |
| ; BW-NEXT: vzeroupper |
| ; BW-NEXT: retq |
| ; |
| ; DQ-LABEL: ctpop_aext_i3_v3i1: |
| ; DQ: # %bb.0: |
| ; DQ-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; DQ-NEXT: vpbroadcastb {{.*#+}} xmm1 = [61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61] |
| ; DQ-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 |
| ; DQ-NEXT: kmovd %k0, %eax |
| ; DQ-NEXT: andl $7, %eax |
| ; DQ-NEXT: popcntl %eax, %eax |
| ; DQ-NEXT: # kill: def $al killed $al killed $eax |
| ; DQ-NEXT: vzeroupper |
| ; DQ-NEXT: retq |
| %v = load <3 x i8>, ptr %p |
| %cmp = icmp ne <3 x i8> %v, splat (i8 61) |
| %bc = bitcast <3 x i1> %cmp to i3 |
| %ct = call i3 @llvm.ctpop.i3(i3 %bc) |
| %ext = zext i3 %ct to i8 |
| ret i8 %ext |
| } |