blob: 78f525e4637bcef72899b8f46b3f2de7e9731a57 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw,+popcnt | FileCheck %s --check-prefix=BW
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw,+avx512dq,+popcnt | FileCheck %s --check-prefix=DQ
; Test (zext (and (trunc x) C)) -> (and x C) fold with AVX512 mask registers.
; Ensures "andb $7, %al; movzbl %al, %eax" is folded to "andl $7, %eax".
; Without AVX512DQ: bitcast v16i1->i16 + truncate i16->i8 (TRUNCATE path).
; With AVX512DQ: extract_subvector v16i1->v8i1 + bitcast v8i1->i8, which
; visitBITCAST canonicalises to the same truncate form before the fold fires.
define i8 @ctpop_aext_i3_v3i1(ptr %p) {
; BW-LABEL: ctpop_aext_i3_v3i1:
; BW: # %bb.0:
; BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; BW-NEXT: vpbroadcastb {{.*#+}} xmm1 = [61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61]
; BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
; BW-NEXT: kmovd %k0, %eax
; BW-NEXT: andl $7, %eax
; BW-NEXT: popcntl %eax, %eax
; BW-NEXT: # kill: def $al killed $al killed $eax
; BW-NEXT: vzeroupper
; BW-NEXT: retq
;
; DQ-LABEL: ctpop_aext_i3_v3i1:
; DQ: # %bb.0:
; DQ-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; DQ-NEXT: vpbroadcastb {{.*#+}} xmm1 = [61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61]
; DQ-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
; DQ-NEXT: kmovd %k0, %eax
; DQ-NEXT: andl $7, %eax
; DQ-NEXT: popcntl %eax, %eax
; DQ-NEXT: # kill: def $al killed $al killed $eax
; DQ-NEXT: vzeroupper
; DQ-NEXT: retq
%v = load <3 x i8>, ptr %p
%cmp = icmp ne <3 x i8> %v, splat (i8 61)
%bc = bitcast <3 x i1> %cmp to i3
%ct = call i3 @llvm.ctpop.i3(i3 %bc)
%ext = zext i3 %ct to i8
ret i8 %ext
}